about summary refs log tree commit diff
path: root/pkgs
diff options
context:
space:
mode:
authorProfpatsch <mail@profpatsch.de>2022-09-07 01:44:35 +0200
committerProfpatsch <mail@profpatsch.de>2022-11-04 18:14:33 +0100
commit26cb66b681f5df87c431868de4459103bb7446c4 (patch)
treededdb2598b0439697d77b176019c98a4e6b5b1fb /pkgs
parent805b5e978d6aff39429e9ebcbc0a6fc2df1a893a (diff)
tree-sitter/update: Fetch the existing repos from python as well
I also used the chance to add some type annotations and make sure they
get accepted by mypy.
Diffstat (limited to 'pkgs')
-rw-r--r--pkgs/development/tools/parsing/tree-sitter/mypy.ini11
-rw-r--r--pkgs/development/tools/parsing/tree-sitter/update.nix29
-rw-r--r--pkgs/development/tools/parsing/tree-sitter/update_impl.py117
3 files changed, 94 insertions, 63 deletions
diff --git a/pkgs/development/tools/parsing/tree-sitter/mypy.ini b/pkgs/development/tools/parsing/tree-sitter/mypy.ini
new file mode 100644
index 0000000000000..3a34ffac26ed5
--- /dev/null
+++ b/pkgs/development/tools/parsing/tree-sitter/mypy.ini
@@ -0,0 +1,11 @@
+[mypy]
+disallow_untyped_defs = true
+disallow_subclassing_any = true
+
+no_implicit_optional = true
+
+warn_redundant_casts = true
+warn_unused_ignores = true
+warn_no_return = true
+warn_return_any = true
+warn_unreachable = true
diff --git a/pkgs/development/tools/parsing/tree-sitter/update.nix b/pkgs/development/tools/parsing/tree-sitter/update.nix
index 9655dd0f0ef76..fb21f16d0657d 100644
--- a/pkgs/development/tools/parsing/tree-sitter/update.nix
+++ b/pkgs/development/tools/parsing/tree-sitter/update.nix
@@ -404,9 +404,6 @@ let
     fi
   '';
 
-  # TODO
-  urlEscape = x: x;
-
   # implementation of the fetching of repo information from github
   fetchImpl = writeShellScript "fetchImpl-wrapped" ''
     env ARGLIB_JSON=${lib.escapeShellArg (lib.generators.toJSON {} {
@@ -418,30 +415,6 @@ let
         } ./update_impl.py} "$@"
   '';
 
-  # find the latest repos of a github organization
-  latestGithubRepos = { orga }: writeShellScript "latest-github-repos" ''
-    set -euo pipefail
-
-    args=( '--silent' )
-    if [ -n "''${GITHUB_TOKEN:-}" ]; then
-      args+=( "-H" "Authorization: token ''${GITHUB_TOKEN}" )
-    fi
-    args+=( 'https://api.github.com/orgs/${urlEscape orga}/repos?per_page=100' )
-
-    res=$(${curl}/bin/curl "''${args[@]}")
-
-    if [[ "$(printf "%s" "$res" | ${jq}/bin/jq '.message?')" =~ "rate limit" ]]; then
-      echo "rate limited" >&2
-      exit 1
-    elif [[ "$(printf "%s" "$res" | ${jq}/bin/jq '.message?')" =~ "Bad credentials" ]]; then
-      echo "bad credentials" >&2
-      exit 1
-    fi
-
-    printf "%s" "$res" | ${jq}/bin/jq 'map(.name)' \
-      || echo "failed $res"
-  '';
-
   foreachSh = attrs: f:
     lib.concatMapStringsSep "\n" f
       (lib.mapAttrsToList (k: v: { name = k; } // v) attrs);
@@ -449,7 +422,7 @@ let
   update-all-grammars = writeShellScript "update-all-grammars.sh" ''
     set -euo pipefail
     echo "fetching list of grammars" 1>&2
-    treeSitterRepos=$(${latestGithubRepos { orga = "tree-sitter"; }})
+    treeSitterRepos=$(${fetchImpl} fetch-orga-latest-repos '{"orga": "tree-sitter"}')
     echo "checking the tree-sitter repo list against the grammars we know" 1>&2
     printf '%s' "$treeSitterRepos" | ${checkTreeSitterRepos}
     outputDir="${toString ./.}/grammars"
diff --git a/pkgs/development/tools/parsing/tree-sitter/update_impl.py b/pkgs/development/tools/parsing/tree-sitter/update_impl.py
index 37378feea6beb..026f2b93b7f28 100644
--- a/pkgs/development/tools/parsing/tree-sitter/update_impl.py
+++ b/pkgs/development/tools/parsing/tree-sitter/update_impl.py
@@ -3,17 +3,20 @@ import json
 import subprocess as sub
 import os
 import sys
+from typing import Generator, Any, Literal
 
-debug = True if os.environ.get("DEBUG", False) else False
-bins = json.loads(os.environ['ARGLIB_JSON'])
+debug: bool = True if os.environ.get("DEBUG", False) else False
+Bin = str
+bins: dict[str, Bin] = json.loads(os.environ['ARGLIB_JSON'])
 
-mode = sys.argv[1]
-jsonArg = json.loads(sys.argv[2])
+mode: str = sys.argv[1]
+jsonArg: dict = json.loads(sys.argv[2])
 
+Args = Generator[str, None, None]
 
-def curl_github_args(token, url):
+
+def curl_github_args(token: str | None, url: str) -> Args:
     """Query the github API via curl"""
-    yield bins["curl"]
     if not debug:
         yield "--silent"
     if token:
@@ -22,22 +25,22 @@ def curl_github_args(token, url):
     yield url
 
 
-def curl_result(orga, repo, output):
+def curl_result(output: bytes) -> Any | Literal["not found"]:
     """Parse the curl result of the github API"""
-    res = json.loads(output)
-    message = res.get("message", "")
-    if "rate limit" in message:
-        sys.exit("Rate limited by the Github API")
-    if "Not Found" in message:
-        # repository not there or no releases; if the repo is missing,
-        # we’ll notice when we try to clone it
-        return {}
+    res: Any = json.loads(output)
+    match res:
+        case dict(res):
+            message: str = res.get("message", "")
+            if "rate limit" in message:
+                sys.exit("Rate limited by the Github API")
+            if "Not Found" in message:
+                return "not found"
+    # if the result is another type, we can pass it on
     return res
 
 
-def nix_prefetch_args(url, version_rev):
+def nix_prefetch_git_args(url: str, version_rev: str) -> Args:
     """Prefetch a git repository"""
-    yield bins["nix-prefetch-git"]
     if not debug:
         yield "--quiet"
     yield "--no-deepClone"
@@ -47,38 +50,82 @@ def nix_prefetch_args(url, version_rev):
     yield version_rev
 
 
-def fetchRepo():
+def run_bin(cmd: str, args: Args) -> bytes:
+    bin: Bin = bins[cmd]
+    all = [bin] + list(args)
+    if debug:
+        print(all, file=sys.stderr)
+    return sub.check_output(all)
+
+
+def fetchRepo() -> None:
     """fetch the given repo and print its nix-prefetch output to stdout"""
     match jsonArg:
         case {"orga": orga, "repo": repo}:
-            token = os.environ.get("GITHUB_TOKEN", None)
-            curl_cmd = list(curl_github_args(
-                token,
-                url=f"https://api.github.com/repos/{quote(orga)}/{quote(repo)}/releases/latest"
-            ))
-            if debug:
-                print(curl_cmd, file=sys.stderr)
-            out = sub.check_output(curl_cmd)
-            release = curl_result(orga, repo, out).get("tag_name", None)
-
-            # github sometimes returns an empty list even tough there are releases
-            if not release:
-                print(f"uh-oh, latest for {orga}/{repo} is not there, using HEAD", file=sys.stderr)
-                release = "HEAD"
+            token: str | None = os.environ.get("GITHUB_TOKEN", None)
+            out = run_bin(
+                "curl",
+                curl_github_args(
+                    token,
+                    url=f"https://api.github.com/repos/{quote(orga)}/{quote(repo)}/releases/latest"
+                )
+            )
+            release: str
+            match curl_result(out):
+                case "not found":
+                    # github sometimes returns an empty list even tough there are releases
+                    print(f"uh-oh, latest for {orga}/{repo} is not there, using HEAD", file=sys.stderr)
+                    release = "HEAD"
+                case {"tag_name": tag_name}:
+                    release = tag_name
+                case _:
+                    sys.exit("git result did not have a `tag_name` field")
 
             print(f"Fetching latest release ({release}) of {orga}/{repo} …", file=sys.stderr)
-            sub.check_call(
-                list(nix_prefetch_args(
+            res = run_bin(
+                "nix-prefetch-git",
+                nix_prefetch_git_args(
                     url=f"https://github.com/{quote(orga)}/{quote(repo)}",
                     version_rev=release
-                ))
+                )
             )
+            sys.stdout.buffer.write(res)
         case _:
             sys.exit("input json must have `orga` and `repo` keys")
 
 
+def fetchOrgaLatestRepos() -> None:
+    """fetch the latest (100) repos from the given github organization"""
+    match jsonArg:
+        case {"orga": orga}:
+            token: str | None = os.environ.get("GITHUB_TOKEN", None)
+            out = run_bin(
+                "curl",
+                curl_github_args(
+                    token,
+                    url=f"https://api.github.com/orgs/{quote(orga)}/repos?per_page=100"
+                )
+            )
+            match curl_result(out):
+                case "not found":
+                    sys.exit(f"github organization {orga} not found")
+                case list(repos):
+                    res: list[str] = []
+                    for repo in repos:
+                        name = repo.get("name")
+                        if name:
+                            res.append(name)
+                    json.dump(res, sys.stdout)
+                case _:
+                    sys.exit("github result was not a list of repos")
+        case _:
+            sys.exit("input json must have `orga` key")
+
+
 match mode:
     case "fetch-repo":
         fetchRepo()
+    case "fetch-orga-latest-repos":
+        fetchOrgaLatestRepos()
     case _:
         sys.exit(f"mode {mode} unknown")