diff options
author | nicoo <nicoo@mur.at> | 2023-12-18 18:37:16 +0000 |
---|---|---|
committer | nicoo <nicoo@mur.at> | 2023-12-18 20:28:51 +0000 |
commit | b1a96bbbf22c2e375080773a366f6e9dbf7cd3ba (patch) | |
tree | badf5a892b0ef412218c3ecee2595ffb8ddc9624 /doc/tests | |
parent | 55520e0602239cf57799cc5eebcac2712b5728eb (diff) |
doc: Add test for broken links in `manpage-urls.json`
Diffstat (limited to 'doc/tests')
-rwxr-xr-x | doc/tests/manpage-urls.py | 107 |
1 files changed, 107 insertions, 0 deletions
diff --git a/doc/tests/manpage-urls.py b/doc/tests/manpage-urls.py new file mode 100755 index 0000000000000..e5242892b7fb9 --- /dev/null +++ b/doc/tests/manpage-urls.py @@ -0,0 +1,107 @@ +#! /usr/bin/env nix-shell +#! nix-shell -i "python3 -I" -p "python3.withPackages(p: with p; [ aiohttp rich structlog ])" + +from argparse import ArgumentParser +from collections import defaultdict +from enum import IntEnum +from http import HTTPStatus +from pathlib import Path +import asyncio, json, logging + +import aiohttp, structlog +from structlog.contextvars import bound_contextvars as log_context + + +LogLevel = IntEnum('LogLevel', { + lvl: getattr(logging, lvl) + for lvl in ('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL') +}) +LogLevel.__str__ = lambda self: self.name + + +EXPECTED_STATUS=frozenset(( + HTTPStatus.OK, HTTPStatus.FOUND, + HTTPStatus.NOT_FOUND, +)) + +async def check(session, manpage: str, url: str) -> HTTPStatus: + with log_context(manpage=manpage, url=url): + logger.debug("Checking") + async with session.head(url) as resp: + st = HTTPStatus(resp.status) + match st: + case HTTPStatus.OK | HTTPStatus.FOUND: + logger.debug("OK!") + case HTTPStatus.NOT_FOUND: + logger.error("Broken link!") + case _ if st < 400: + logger.info("Unexpected code", status=st) + case _ if 400 <= st < 600: + logger.warn("Unexpected error", status=st) + + return st + +async def main(urls_path): + logger.info(f"Parsing {urls_path}") + with urls_path.open() as urls_file: + urls = json.load(urls_file) + + count = defaultdict(lambda: 0) + + logger.info(f"Checking URLs from {urls_path}") + async with aiohttp.ClientSession() as session: + for status in asyncio.as_completed([ + check(session, manpage, url) + for manpage, url in urls.items() + ]): + count[await status]+=1 + + ok = count[HTTPStatus.OK] + count[HTTPStatus.FOUND] + broken = count[HTTPStatus.NOT_FOUND] + unknown = sum(c for st, c in count.items() if st not in EXPECTED_STATUS) + logger.info(f"Done: {broken} broken links, " + f"{ok} correct links, and {unknown} unexpected status") + + return count + + +def parse_args(args=None): + parser = ArgumentParser( + prog = 'check-manpage-urls', + description = 'Check the validity of the manpage URLs linked in the nixpkgs manual', + ) + parser.add_argument( + '-l', '--log-level', + default = os.getenv('LOG_LEVEL', 'INFO'), + type = lambda s: LogLevel[s], + choices = list(LogLevel), + ) + parser.add_argument( + 'file', + type = Path, + nargs = '?', + ) + + return parser.parse_args(args) + + +if __name__ == "__main__": + import os, sys + + args = parse_args() + + structlog.configure( + wrapper_class=structlog.make_filtering_bound_logger(args.log_level), + ) + logger = structlog.getLogger("check-manpage-urls.py") + + urls_path = args.file + if urls_path is None: + REPO_ROOT = Path(__file__).parent.parent.parent.parent + logger.info(f"Assuming we are in a nixpkgs repo rooted at {REPO_ROOT}") + + urls_path = REPO_ROOT / 'doc' / 'manpage-urls.json' + + count = asyncio.run(main(urls_path)) + + sys.exit(0 if count[HTTPStatus.NOT_FOUND] == 0 else 1) |