summary refs log tree commit diff
path: root/pkgs/development/tools/parsing
diff options
context:
space:
mode:
authorProfpatsch <mail@profpatsch.de>2022-09-07 02:55:18 +0200
committerProfpatsch <mail@profpatsch.de>2022-11-04 18:14:33 +0100
commit2de554d51215f318b992028ba569ea546bb57306 (patch)
treee9af6f9b50132a99a9e35614121ae1531710598f /pkgs/development/tools/parsing
parenta64a9d5552a62627ccb562c6f5ffa9c6312726e1 (diff)
tree-sitter/update: Fetch repositories in parallel
This was the main annoyance with the existing script, it would fetch
one repository after the other instead of multiple in parallel.
The `xe` tool is a simple `xargs`-like that can do that.

We put the json information for all repositories into a file with one
object per line, then pass that to xe.
Diffstat (limited to 'pkgs/development/tools/parsing')
-rw-r--r--pkgs/development/tools/parsing/tree-sitter/update.nix48
-rw-r--r--pkgs/development/tools/parsing/tree-sitter/update_impl.py4
2 files changed, 34 insertions, 18 deletions
diff --git a/pkgs/development/tools/parsing/tree-sitter/update.nix b/pkgs/development/tools/parsing/tree-sitter/update.nix
index 97e68b9814142..ccb81a3f94b6b 100644
--- a/pkgs/development/tools/parsing/tree-sitter/update.nix
+++ b/pkgs/development/tools/parsing/tree-sitter/update.nix
@@ -419,29 +419,43 @@ let
     lib.concatMapStringsSep "\n" f
       (lib.mapAttrsToList (k: v: { name = k; } // v) attrs);
 
+  jsonNewlines = lib.concatMapStringsSep "\n" (lib.generators.toJSON {});
+
+  # Run the given script for each of the attr list.
+  # The attrs are passed to the script as a json value.
+  forEachParallel = name: script: listOfAttrs: writeShellScript "for-each-parallel.sh" ''
+    < ${writeText "${name}.json" (jsonNewlines listOfAttrs)} \
+      ${xe}/bin/xe -F -j5 ${script} {}
+  '';
+
+  outputDir = "${toString ./.}/grammars";
   update-all-grammars = writeShellScript "update-all-grammars.sh" ''
     set -euo pipefail
     echo "fetching list of grammars" 1>&2
     treeSitterRepos=$(${fetchImpl} fetch-orga-latest-repos '{"orga": "tree-sitter"}')
     echo "checking the tree-sitter repo list against the grammars we know" 1>&2
     printf '%s' "$treeSitterRepos" | ${checkTreeSitterRepos}
-    outputDir="${toString ./.}/grammars"
-    echo "writing files to $outputDir" 1>&2
-    mkdir -p "$outputDir"
-    ${foreachSh allGrammars
-      ({name, orga, repo}: ''
-        ${atomically-write} \
-          $outputDir/${name}.json \
-          ${fetchImpl} fetch-repo '${lib.generators.toJSON {} {inherit orga repo;}}'
-      '')}
-    ( echo "{ lib }:"
-      echo "{"
-      ${foreachSh allGrammars
-        ({name, ...}: ''
-           # indentation hack
-             printf "  %s = lib.importJSON ./%s.json;\n" "${name}" "${name}"'')}
-      echo "}" ) \
-      > "$outputDir/default.nix"
+    echo "writing files to ${outputDir}" 1>&2
+    mkdir -p "${outputDir}"
+    ${forEachParallel
+        "repos-to-fetch"
+        (writeShellScript "fetch-repo" ''
+          ${atomically-write} \
+            "${outputDir}/$(jq --raw-output --null-input '$ARGS.positional[0].name' --jsonargs "$1").json" \
+            ${fetchImpl} fetch-repo "$1"
+        '')
+        (lib.mapAttrsToList (name: attrs: attrs // { inherit name; }) allGrammars)
+    }
+    ${atomically-write} \
+      "${outputDir}/default.nix" \
+      ${writeShellScript "print-all-grammars" ''
+          echo "{ lib }:"
+          echo "{"
+          ${foreachSh allGrammars
+            ({name, ...}: ''
+              printf "  %s = lib.importJSON ./%s.json;\n" "${name}" "${name}"'')}
+          echo "}"
+       ''}
   '';
 
   # Atomically write a file (just `>` redirection in bash
diff --git a/pkgs/development/tools/parsing/tree-sitter/update_impl.py b/pkgs/development/tools/parsing/tree-sitter/update_impl.py
index 026f2b93b7f28..23b1e06860503 100644
--- a/pkgs/development/tools/parsing/tree-sitter/update_impl.py
+++ b/pkgs/development/tools/parsing/tree-sitter/update_impl.py
@@ -19,6 +19,8 @@ def curl_github_args(token: str | None, url: str) -> Args:
     """Query the github API via curl"""
     if not debug:
         yield "--silent"
+    # follow redirects
+    yield "--location"
     if token:
         yield "-H"
         yield f"Authorization: token {token}"
@@ -79,7 +81,7 @@ def fetchRepo() -> None:
                 case {"tag_name": tag_name}:
                     release = tag_name
                 case _:
-                    sys.exit("git result did not have a `tag_name` field")
+                    sys.exit(f"git result for {orga}/{repo} did not have a `tag_name` field")
 
             print(f"Fetching latest release ({release}) of {orga}/{repo} …", file=sys.stderr)
             res = run_bin(