#! /usr/bin/env nix-shell
#! nix-shell -I nixpkgs=channel:nixos-unstable -i python3 -p python3 -p python3.pkgs.lxml
"""
Pandoc will strip any markup within code elements so
let’s escape them so that they can be handled manually.
"""
import lxml.etree as ET
import re
import sys
def replace_element_by_text(el: ET.Element, text: str) -> None:
"""
Author: bernulf
Source: https://stackoverflow.com/a/10520552/160386
SPDX-License-Identifier: CC-BY-SA-3.0
"""
text = text + (el.tail or "")
parent = el.getparent()
if parent is not None:
previous = el.getprevious()
if previous is not None:
previous.tail = (previous.tail or "") + text
else:
parent.text = (parent.text or "") + text
parent.remove(el)
DOCBOOK_NS = "http://docbook.org/ns/docbook"
# List of elements that pandoc’s DocBook reader strips markup from.
# https://github.com/jgm/pandoc/blob/master/src/Text/Pandoc/Readers/DocBook.hs
code_elements = [
# CodeBlock
"literallayout",
"screen",
"programlisting",
# Code (inline)
"classname",
"code",
"filename",
"envar",
"literal",
"computeroutput",
"prompt",
"parameter",
"option",
"markup",
"wordasword",
"command",
"varname",
"function",
"type",
"symbol",
"constant",
"userinput",
"systemitem",
]
XMLNS_REGEX = re.compile(r'\s+xmlns(?::[^=]+)?="[^"]*"')
ROOT_ELEMENT_REGEX = re.compile(r'^\s*<[^>]+>')
def remove_xmlns(match: re.Match) -> str:
"""
Removes xmlns attributes.
Expects a match containing an opening tag.
"""
return XMLNS_REGEX.sub('', match.group(0))
if __name__ == '__main__':
assert len(sys.argv) >= 3, "usage: escape-code-markup.py