diff options
Diffstat (limited to 'pkgs/tools/nix/nixos-render-docs/src/nixos_render_docs/manpage.py')
-rw-r--r-- | pkgs/tools/nix/nixos-render-docs/src/nixos_render_docs/manpage.py | 330 |
1 files changed, 330 insertions, 0 deletions
diff --git a/pkgs/tools/nix/nixos-render-docs/src/nixos_render_docs/manpage.py b/pkgs/tools/nix/nixos-render-docs/src/nixos_render_docs/manpage.py new file mode 100644 index 0000000000000..24d0e8f3d699e --- /dev/null +++ b/pkgs/tools/nix/nixos-render-docs/src/nixos_render_docs/manpage.py @@ -0,0 +1,330 @@ +from collections.abc import Mapping, MutableMapping, Sequence +from dataclasses import dataclass +from typing import Any, cast, Iterable, Optional + +import re + +import markdown_it +from markdown_it.token import Token +from markdown_it.utils import OptionsDict + +from .md import Renderer + +# roff(7) says: +# +# > roff documents may contain only graphable 7-bit ASCII characters, the space character, +# > and, in certain circumstances, the tab character. The backslash character ‘\’ indicates +# > the start of an escape sequence […] +# +# mandoc_char(7) says about the `'~^ characters: +# +# > In prose, this automatic substitution is often desirable; but when these characters have +# > to be displayed as plain ASCII characters, for example in source code samples, they require +# > escaping to render as follows: +# +# since we don't want these to be touched anywhere (because markdown will do all substituations +# we want to have) we'll escape those as well. we also escape " (macro metacharacter), - (might +# turn into a typographic hyphen), and . (roff request marker at SOL, changes spacing semantics +# at EOL). groff additionally does not allow unicode escapes for codepoints below U+0080, so +# those need "proper" roff escapes/replacements instead. +_roff_unicode = re.compile(r'''[^\n !#$%&()*+,\-./0-9:;<=>?@A-Z[\\\]_a-z{|}]''', re.ASCII) +_roff_escapes = { + ord('"'): "\\(dq", + ord("'"): "\\(aq", + ord('-'): "\\-", + ord('.'): "\\&.", + ord('\\'): "\\e", + ord('^'): "\\(ha", + ord('`'): "\\(ga", + ord('~'): "\\(ti", +} +def man_escape(s: str) -> str: + s = s.translate(_roff_escapes) + return _roff_unicode.sub(lambda m: f"\\[u{ord(m[0]):04X}]", s) + +# remove leading and trailing spaces from links and condense multiple consecutive spaces +# into a single space for presentation parity with html. this is currently easiest with +# regex postprocessing and some marker characters. since we don't want to drop spaces +# from code blocks we will have to specially protect *inline* code (luckily not block code) +# so normalization can turn the spaces inside it into regular spaces again. +_normalize_space_re = re.compile(r'''\u0000 < *| *>\u0000 |(?<= ) +''') +def _normalize_space(s: str) -> str: + return _normalize_space_re.sub("", s).replace("\0p", " ") + +def _protect_spaces(s: str) -> str: + return s.replace(" ", "\0p") + +@dataclass(kw_only=True) +class List: + width: int + next_idx: Optional[int] = None + compact: bool + first_item_seen: bool = False + +# this renderer assumed that it produces a set of lines as output, and that those lines will +# be pasted as-is into a larger output. no prefixing or suffixing is allowed for correctness. +# +# NOTE that we output exclusively physical markup. this is because we have to use the older +# mandoc(7) format instead of the newer mdoc(7) format due to limitations in groff: while +# using mdoc in groff works fine it is not a native format and thus very slow to render on +# manpages as large as configuration.nix.5. mandoc(1) renders both really quickly, but with +# groff being our predominant manpage viewer we have to optimize for groff instead. +# +# while we do use only physical markup (adjusting indentation with .RS and .RE, adding +# vertical spacing with .sp, \f[BIRP] escapes for bold/italic/roman/$previous font, \h for +# horizontal motion in a line) we do attempt to copy the style of mdoc(7) semantic requests +# as appropriate for each markup element. +class ManpageRenderer(Renderer): + __output__ = "man" + + # whether to emit mdoc .Ql equivalents for inline code or just the contents. this is + # mainly used by the options manpage converter to not emit extra quotes in defaults + # and examples where it's already clear from context that the following text is code. + inline_code_is_quoted: bool = True + link_footnotes: Optional[list[str]] = None + + _href_targets: dict[str, str] + + _link_stack: list[str] + _do_parbreak_stack: list[bool] + _list_stack: list[List] + _font_stack: list[str] + + def __init__(self, manpage_urls: Mapping[str, str], href_targets: dict[str, str], + parser: Optional[markdown_it.MarkdownIt] = None): + super().__init__(manpage_urls, parser) + self._href_targets = href_targets + self._link_stack = [] + self._do_parbreak_stack = [] + self._list_stack = [] + self._font_stack = [] + + def _join_block(self, ls: Iterable[str]) -> str: + return "\n".join([ l for l in ls if len(l) ]) + def _join_inline(self, ls: Iterable[str]) -> str: + return _normalize_space(super()._join_inline(ls)) + + def _enter_block(self) -> None: + self._do_parbreak_stack.append(False) + def _leave_block(self) -> None: + self._do_parbreak_stack.pop() + self._do_parbreak_stack[-1] = True + def _maybe_parbreak(self, suffix: str = "") -> str: + result = f".sp{suffix}" if self._do_parbreak_stack[-1] else "" + self._do_parbreak_stack[-1] = True + return result + + def _admonition_open(self, kind: str) -> str: + self._enter_block() + return ( + '.sp\n' + '.RS 4\n' + f'\\fB{kind}\\fP\n' + '.br' + ) + def _admonition_close(self) -> str: + self._leave_block() + return ".RE" + + def render(self, tokens: Sequence[Token], options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + self._do_parbreak_stack = [ False ] + self._font_stack = [ "\\fR" ] + return super().render(tokens, options, env) + + def text(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + return man_escape(token.content) + def paragraph_open(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + return self._maybe_parbreak() + def paragraph_close(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + return "" + def hardbreak(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + return ".br" + def softbreak(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + return " " + def code_inline(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + s = _protect_spaces(man_escape(token.content)) + return f"\\fR\\(oq{s}\\(cq\\fP" if self.inline_code_is_quoted else s + def code_block(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + return self.fence(token, tokens, i, options, env) + def link_open(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + href = cast(str, token.attrs['href']) + self._link_stack.append(href) + text = "" + if tokens[i + 1].type == 'link_close' and href in self._href_targets: + # TODO error or warning if the target can't be resolved + text = self._href_targets[href] + self._font_stack.append("\\fB") + return f"\\fB{text}\0 <" + def link_close(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + href = self._link_stack.pop() + text = "" + if self.link_footnotes is not None: + try: + idx = self.link_footnotes.index(href) + 1 + except ValueError: + self.link_footnotes.append(href) + idx = len(self.link_footnotes) + text = "\\fR" + man_escape(f"[{idx}]") + self._font_stack.pop() + return f">\0 {text}{self._font_stack[-1]}" + def list_item_open(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + self._enter_block() + lst = self._list_stack[-1] + maybe_space = '' if lst.compact or not lst.first_item_seen else '.sp\n' + lst.first_item_seen = True + head = "•" + if lst.next_idx is not None: + head = f"{lst.next_idx}." + lst.next_idx += 1 + return ( + f'{maybe_space}' + f'.RS {lst.width}\n' + f"\\h'-{len(head) + 1}'\\fB{man_escape(head)}\\fP\\h'1'\\c" + ) + def list_item_close(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + self._leave_block() + return ".RE" + def bullet_list_open(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + self._list_stack.append(List(width=4, compact=bool(token.meta['compact']))) + return self._maybe_parbreak() + def bullet_list_close(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + self._list_stack.pop() + return "" + def em_open(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + self._font_stack.append("\\fI") + return "\\fI" + def em_close(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + self._font_stack.pop() + return self._font_stack[-1] + def strong_open(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + self._font_stack.append("\\fB") + return "\\fB" + def strong_close(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + self._font_stack.pop() + return self._font_stack[-1] + def fence(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + s = man_escape(token.content).rstrip('\n') + return ( + '.sp\n' + '.RS 4\n' + '.nf\n' + f'{s}\n' + '.fi\n' + '.RE' + ) + def blockquote_open(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + maybe_par = self._maybe_parbreak("\n") + self._enter_block() + return ( + f"{maybe_par}" + ".RS 4\n" + f"\\h'-3'\\fI\\(lq\\(rq\\fP\\h'1'\\c" + ) + def blockquote_close(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + self._leave_block() + return ".RE" + def note_open(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + return self._admonition_open("Note") + def note_close(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + return self._admonition_close() + def caution_open(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + return self._admonition_open( "Caution") + def caution_close(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + return self._admonition_close() + def important_open(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + return self._admonition_open( "Important") + def important_close(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + return self._admonition_close() + def tip_open(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + return self._admonition_open( "Tip") + def tip_close(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + return self._admonition_close() + def warning_open(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + return self._admonition_open( "Warning") + def warning_close(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + return self._admonition_close() + def dl_open(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + return ".RS 4" + def dl_close(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + return ".RE" + def dt_open(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + return ".PP" + def dt_close(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + return "" + def dd_open(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + self._enter_block() + return ".RS 4" + def dd_close(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + self._leave_block() + return ".RE" + def myst_role(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + if token.meta['name'] in [ 'command', 'env', 'option' ]: + return f'\\fB{man_escape(token.content)}\\fP' + elif token.meta['name'] in [ 'file', 'var' ]: + return f'\\fI{man_escape(token.content)}\\fP' + elif token.meta['name'] == 'manpage': + [page, section] = [ s.strip() for s in token.content.rsplit('(', 1) ] + section = section[:-1] + return f'\\fB{man_escape(page)}\\fP\\fR({man_escape(section)})\\fP' + else: + raise NotImplementedError("md node not supported yet", token) + def inline_anchor(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + return "" # mdoc knows no anchors + def heading_open(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + raise RuntimeError("md token not supported in manpages", token) + def heading_close(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + raise RuntimeError("md token not supported in manpages", token) + def ordered_list_open(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + # max item head width for a number, a dot, and one leading space and one trailing space + width = 3 + len(str(cast(int, token.meta['end']))) + self._list_stack.append( + List(width = width, + next_idx = cast(int, token.attrs.get('start', 1)), + compact = bool(token.meta['compact']))) + return self._maybe_parbreak() + def ordered_list_close(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict, + env: MutableMapping[str, Any]) -> str: + self._list_stack.pop() + return "" |