about summary refs log tree commit diff
path: root/pkgs/aszlig
diff options
context:
space:
mode:
authoraszlig <aszlig@nix.build>2021-07-06 05:10:35 +0200
committeraszlig <aszlig@nix.build>2021-07-09 17:48:22 +0200
commitbf04f3d07c11132f4cb6cf288f5c5fec4a823501 (patch)
tree091d59108ddbe7efefb2460d0e8a2e60768f7f7f /pkgs/aszlig
parent5f6dd9bad408accbccc6b0cb1174ac9a29959876 (diff)
vim: Add highlighting for Markdown code blocks
While I could have done this simply by setting the
g:markdown_fenced_languages variable, I instead decided it would be a
better idea to use the same language names that GitHub recognises via
their GitHub Flavored Markdown syntax.

Since they're using Linguist, I decided to simply import the YAML file
and try to match them against existing Vim syntax files. That way,
we only need to maintain a blacklist of languages we do not want and
should pretty much get highlighting for all supported languages.

Unfortunately, the "markdown.vim" syntax file sources all of the syntax
files for these languages and so the more languages we include there,
the slower it gets when opening a Markdown file.

Right now, I mostly use this for editing textareas, so let's see how
annoying the slower load time will get and blacklist more languages
later if it bugs me too much.

Signed-off-by: aszlig <aszlig@nix.build>
Diffstat (limited to 'pkgs/aszlig')
-rw-r--r--pkgs/aszlig/vim/default.nix117
1 files changed, 116 insertions, 1 deletions
diff --git a/pkgs/aszlig/vim/default.nix b/pkgs/aszlig/vim/default.nix
index 2d871c9a..cd1c6064 100644
--- a/pkgs/aszlig/vim/default.nix
+++ b/pkgs/aszlig/vim/default.nix
@@ -1,5 +1,5 @@
 { stdenv, lib, fetchurl, fetchFromGitHub, writeText, writeTextFile, writeScript
-, python3Packages, ledger, meson, vim
+, runCommand, writers, python3Packages, ledger, meson, vim
 }:
 
 let
@@ -410,6 +410,118 @@ let
     set history=500
   '';
 
+  vimLanguages = runCommand "vim-languages.json" {
+    inherit vim;
+    plugins = lib.attrValues plugins;
+  } ''
+    result=
+
+    add_synfiles() {
+      for synfile in "$@"; do
+        synbase="$(basename "$synfile")"
+        result="$result''${result:+,}\"''${synbase%.vim}\""
+      done
+    }
+
+    add_synfiles "$vim"/share/vim/vim[0-9]*/syntax/*.vim
+    for plugin in $plugins; do
+      add_synfiles "$plugin"/syntax/*.vim
+    done
+
+    echo "[$result]" > "$out"
+  '';
+
+  githubLanguages = let
+    rev = "16c70aef8cd62ca071231a380c69050f5e83c900";
+    sha256 = "1bzxvf9128dzwc3n6hamvqld1idmagx0mfw8ydg4v7789yywppjj";
+    file = "lib/linguist/languages.yml";
+    url = "https://github.com/github/linguist/raw/${rev}/${file}";
+  in fetchurl { inherit url sha256; };
+
+  blacklistedLanguages = [
+    # Support file rather than real syntax files, which we never ever want to
+    # include directly.
+    "syncolor" "synload" "syntax"
+
+    # Those have poor runtime performance because they include other syntax
+    # files. While there are certainly others, I excluded the following syntax
+    # files because I find them highly unlikely to occur in code blocks from
+    # Markdown files I care about.
+    "2html" "ant" "antlr" "groovy" "gsp" "haml" "javacc" "jinja" "jsp" "less"
+    "lucius" "phtml" "rpcgen" "sass" "scss" "smarty" "sqlj" "vim" "vue" "xs"
+
+    # Cause conflicts because they execute "syn sync linecont".
+    "fortran" "rexx" "sicad"
+
+    # These syntax files recursively include markdown.vim syntax file.
+    "lhaskell" "markdown" "pug" "rmd"
+
+    # Other markup languages I think are pretty unlikely to be included in
+    # Markdown code blocks.
+    "asciidoc" "docbk" "docbksgml" "docbkxml" "doxygen" "groff" "nroff" "rst"
+    "rtf"
+  ];
+
+  transformer = writers.writePython3Bin "transform-languages" {
+    libraries = lib.singleton python3Packages.pyyaml;
+    flakeIgnore = [ "E111" "E114" "E501" ];
+  } ''
+    import json
+    import sys
+    import yaml
+
+    blacklisted_languages = {${
+      lib.concatMapStringsSep ", " (l: "'${l}'") blacklistedLanguages
+    }}
+
+    with open(sys.argv[1]) as fp:
+      vim_languages = set(json.load(fp)) - blacklisted_languages
+
+    # This generate language canidates/aliases according to
+    # https://git.io/Jcioq with the keys being the corresponding Vim syntax
+    # file name and the values being a set of all the aliases we can use for
+    # GitHub Flavored Markdown.
+    languages = {}
+    with open(sys.argv[2]) as fp:
+      for name, lang in yaml.load(fp, Loader=yaml.CLoader).items():
+        if lang['tm_scope'] == 'none':
+          continue
+
+        aliases = lang.get('aliases', [])
+        interpreters = lang.get('interpreters', [])
+        # The Vim variant doesn't support a dot prefix to the language name.
+        extensions = [ext.lstrip('.') for ext in lang.get('extensions', [])]
+
+        raw_canidates = [name] + aliases + interpreters + extensions
+        # Note that this is not a set (yet), because order is important here,
+        # going from the most specific ones to the least specific ones.
+        canidates = [c.replace(' ', '-').lower() for c in raw_canidates]
+
+        for canidate in canidates:
+          if canidate in vim_languages:
+            # At this point we *do* want to make sure it's a set because we
+            # only want to provide the aliases once.
+            languages[canidate] = set(canidates)
+            break
+
+    # This is for getting the canidates/aliases into the format expected by
+    # g:markdown_fenced_languages, which either is the syntax file directly or
+    # some aliased mapping like "bash=sh".
+    fenced_langs = []
+    for name, canidates in languages.items():
+      fenced_langs += [c if c == name else f'{c}={name}' for c in canidates]
+
+    with open(sys.argv[3], 'w') as fp:
+      escaped = ["'" + val.replace("'", "'''") + "'" for val in fenced_langs]
+      vim_fenced_langs = '[' + ', '.join(escaped) + ']'
+      fp.write('let g:markdown_fenced_languages = ' + vim_fenced_langs + "\n")
+  '';
+
+  vimMarkdownLanguages = runCommand "markdown-languages.vim" {
+    inherit vimLanguages githubLanguages;
+    nativeBuildInputs = lib.singleton transformer;
+  } "transform-languages \"$vimLanguages\" \"$githubLanguages\" \"$out\"";
+
   plugin = ''
     " erlang
     let erlang_folding = 0
@@ -438,6 +550,9 @@ let
     let php_baselib = 1
     let php_htmlInStrings = 1
     let g:PHP_vintage_case_default_indent = 1
+
+    " markdown
+    source ${vimMarkdownLanguages}
   '';
 
   autocmd = ''