about summary refs log tree commit diff
diff options
context:
space:
mode:
authorWinter <winter@winter.cafe>2022-11-13 22:14:46 -0500
committerWinter <winter@winter.cafe>2022-11-21 15:00:08 -0500
commit009a234bdd91d91008e95c7e8b9de16439114635 (patch)
treed8b43d7f24fde7aae2fdf7ccb51b18dec8600530
parent091d039b12a485bd3b677d862da5b35da4b19f36 (diff)
prefetch-npm-deps: repack hosted git deps
Previously, we stored the tarballs from the hosted Git providers directly in the cache. However, as we've seen with `fetchFromGitHub` etc, these files may change subtly.

Given this, this commit repacks the dependencies before storing them in the cache.
-rw-r--r--pkgs/build-support/node/build-npm-package/hooks/default.nix3
-rw-r--r--pkgs/build-support/node/build-npm-package/hooks/npm-config-hook.sh2
-rw-r--r--pkgs/build-support/node/fetch-npm-deps/default.nix11
-rw-r--r--pkgs/build-support/node/fetch-npm-deps/src/main.rs171
-rw-r--r--pkgs/build-support/node/fetch-npm-deps/src/tests.rs55
5 files changed, 205 insertions, 37 deletions
diff --git a/pkgs/build-support/node/build-npm-package/hooks/default.nix b/pkgs/build-support/node/build-npm-package/hooks/default.nix
index 4ac981af916cf..ff0930426d4e9 100644
--- a/pkgs/build-support/node/build-npm-package/hooks/default.nix
+++ b/pkgs/build-support/node/build-npm-package/hooks/default.nix
@@ -7,10 +7,11 @@
       substitutions = {
         nodeSrc = srcOnly nodejs;
 
-        # Specify the stdenv's `diff` and `jq` by abspath to ensure that the user's build
+        # Specify `diff`, `jq`, and `prefetch-npm-deps` by abspath to ensure that the user's build
         # inputs do not cause us to find the wrong binaries.
         diff = "${buildPackages.diffutils}/bin/diff";
         jq = "${buildPackages.jq}/bin/jq";
+        prefetchNpmDeps = "${buildPackages.prefetch-npm-deps}/bin/prefetch-npm-deps";
 
         nodeVersion = nodejs.version;
         nodeVersionMajor = lib.versions.major nodejs.version;
diff --git a/pkgs/build-support/node/build-npm-package/hooks/npm-config-hook.sh b/pkgs/build-support/node/build-npm-package/hooks/npm-config-hook.sh
index 17244cecb2852..6fa6a0f940b1a 100644
--- a/pkgs/build-support/node/build-npm-package/hooks/npm-config-hook.sh
+++ b/pkgs/build-support/node/build-npm-package/hooks/npm-config-hook.sh
@@ -56,6 +56,8 @@ npmConfigHook() {
       exit 1
     fi
 
+    @prefetchNpmDeps@ --fixup-lockfile "$srcLockfile"
+
     local cachePath
 
     if [ -z "${makeCacheWritable-}" ]; then
diff --git a/pkgs/build-support/node/fetch-npm-deps/default.nix b/pkgs/build-support/node/fetch-npm-deps/default.nix
index d1e847c8a12e4..7d5ea7cbfbe8f 100644
--- a/pkgs/build-support/node/fetch-npm-deps/default.nix
+++ b/pkgs/build-support/node/fetch-npm-deps/default.nix
@@ -1,4 +1,4 @@
-{ lib, stdenvNoCC, rustPlatform, Security, testers, fetchurl, prefetch-npm-deps, fetchNpmDeps }:
+{ lib, stdenvNoCC, rustPlatform, makeWrapper, Security, gnutar, gzip, testers, fetchurl, prefetch-npm-deps, fetchNpmDeps }:
 
 {
   prefetch-npm-deps = rustPlatform.buildRustPackage {
@@ -16,8 +16,13 @@
 
     cargoLock.lockFile = ./Cargo.lock;
 
+    nativeBuildInputs = [ makeWrapper ];
     buildInputs = lib.optional stdenvNoCC.isDarwin Security;
 
+    postInstall = ''
+      wrapProgram "$out/bin/prefetch-npm-deps" --prefix PATH : ${lib.makeBinPath [ gnutar gzip ]}
+    '';
+
     passthru.tests =
       let
         makeTestSrc = { name, src }: stdenvNoCC.mkDerivation {
@@ -79,7 +84,7 @@
             hash = "sha256-X9mCwPqV5yP0S2GonNvpYnLSLJMd/SUIked+hMRxDpA=";
           };
 
-          hash = "sha256-ri8qvYjn420ykmCC2Uy5P3jxVVrKWJG3ug/qLIGcR7o=";
+          hash = "sha256-5Mg7KDJLMM5e/7BCHGinGAnBRft2ySQzvKW06p3u/0o=";
         };
 
         linkDependencies = makeTest {
@@ -102,7 +107,7 @@
             hash = "sha256-1fGNxYJi1I4cXK/jinNG+Y6tPEOhP3QAqWOBEQttS9E=";
           };
 
-          hash = "sha256-73rLcSBgsZRJFELaKK++62hVbt1QT8JgLu2hyDSmIZE=";
+          hash = "sha256-8xF8F74nHwL9KPN2QLsxnfvsk0rNCKOZniYJQCD5u/I=";
         };
       };
 
diff --git a/pkgs/build-support/node/fetch-npm-deps/src/main.rs b/pkgs/build-support/node/fetch-npm-deps/src/main.rs
index 7189969b84d56..cf9651d42d649 100644
--- a/pkgs/build-support/node/fetch-npm-deps/src/main.rs
+++ b/pkgs/build-support/node/fetch-npm-deps/src/main.rs
@@ -4,11 +4,12 @@ use crate::cacache::Cache;
 use anyhow::{anyhow, Context};
 use rayon::prelude::*;
 use serde::Deserialize;
+use serde_json::{Map, Value};
 use std::{
     collections::{HashMap, HashSet},
-    env, fmt, fs,
+    env, fmt, fs, io,
     path::Path,
-    process::{self, Command},
+    process::{self, Command, Stdio},
 };
 use tempfile::tempdir;
 use url::Url;
@@ -245,6 +246,55 @@ fn get_initial_url() -> anyhow::Result<Url> {
     Url::parse("git+ssh://git@a.b").context("initial url should be valid")
 }
 
+/// `fixup_lockfile` removes the `integrity` field from Git dependencies.
+///
+/// Git dependencies from specific providers can be retrieved from those providers' automatic tarball features.
+/// When these dependencies are specified with a commit identifier, npm generates a tarball, and inserts the integrity hash of that
+/// tarball into the lockfile.
+///
+/// Thus, we remove this hash, to replace it with our own determinstic copies of dependencies from hosted Git providers.
+fn fixup_lockfile(mut lock: Map<String, Value>) -> anyhow::Result<Option<Map<String, Value>>> {
+    if lock
+        .get("lockfileVersion")
+        .ok_or_else(|| anyhow!("couldn't get lockfile version"))?
+        .as_i64()
+        .ok_or_else(|| anyhow!("lockfile version isn't an int"))?
+        < 2
+    {
+        return Ok(None);
+    }
+
+    let mut fixed = false;
+
+    for package in lock
+        .get_mut("packages")
+        .ok_or_else(|| anyhow!("couldn't get packages"))?
+        .as_object_mut()
+        .ok_or_else(|| anyhow!("packages isn't a map"))?
+        .values_mut()
+    {
+        if let Some(Value::String(resolved)) = package.get("resolved") {
+            if resolved.starts_with("git+ssh://") && package.get("integrity").is_some() {
+                fixed = true;
+
+                package
+                    .as_object_mut()
+                    .ok_or_else(|| anyhow!("package isn't a map"))?
+                    .remove("integrity");
+            }
+        }
+    }
+
+    if fixed {
+        lock.remove("dependencies");
+
+        Ok(Some(lock))
+    } else {
+        Ok(None)
+    }
+}
+
+#[allow(clippy::too_many_lines)]
 fn main() -> anyhow::Result<()> {
     let args = env::args().collect::<Vec<_>>();
 
@@ -256,6 +306,18 @@ fn main() -> anyhow::Result<()> {
         process::exit(1);
     }
 
+    if args[1] == "--fixup-lockfile" {
+        let lock = serde_json::from_str(&fs::read_to_string(&args[2])?)?;
+
+        if let Some(fixed) = fixup_lockfile(lock)? {
+            println!("Fixing lockfile");
+
+            fs::write(&args[2], serde_json::to_string(&fixed)?)?;
+        }
+
+        return Ok(());
+    }
+
     let lock_content = fs::read_to_string(&args[1])?;
     let lock: PackageLock = serde_json::from_str(&lock_content)?;
 
@@ -310,40 +372,87 @@ fn main() -> anyhow::Result<()> {
 
     let cache = Cache::new(out.join("_cacache"));
 
-    packages.into_par_iter().try_for_each(|(dep, package)| {
-        eprintln!("{dep}");
+    packages
+        .into_par_iter()
+        .try_for_each(|(dep, mut package)| {
+            eprintln!("{dep}");
 
-        let mut resolved = match package.resolved {
-            Some(UrlOrString::Url(url)) => url,
-            _ => unreachable!(),
-        };
+            let mut resolved = match package.resolved {
+                Some(UrlOrString::Url(url)) => url,
+                _ => unreachable!(),
+            };
 
-        if let Some(hosted_git_url) = get_hosted_git_url(&resolved) {
-            resolved = hosted_git_url;
-        }
+            let mut hosted = false;
 
-        let mut data = Vec::new();
+            if let Some(hosted_git_url) = get_hosted_git_url(&resolved) {
+                resolved = hosted_git_url;
+                package.integrity = None;
+                hosted = true;
+            }
 
-        agent
-            .get(resolved.as_str())
-            .call()?
-            .into_reader()
-            .read_to_end(&mut data)?;
+            let mut data = Vec::new();
 
-        cache
-            .put(
-                format!("make-fetch-happen:request-cache:{resolved}"),
-                resolved,
-                &data,
-                package
-                    .integrity
-                    .map(|i| Ok::<String, anyhow::Error>(get_ideal_hash(&i)?.to_string()))
-                    .transpose()?,
-            )
-            .map_err(|e| anyhow!("couldn't insert cache entry for {dep}: {e:?}"))?;
-
-        Ok::<_, anyhow::Error>(())
-    })?;
+            let mut body = agent.get(resolved.as_str()).call()?.into_reader();
+
+            if hosted {
+                let workdir = tempdir()?;
+
+                let tar_path = workdir.path().join("package");
+
+                fs::create_dir(&tar_path)?;
+
+                let mut cmd = Command::new("tar")
+                    .args(["--extract", "--gzip", "--strip-components=1", "-C"])
+                    .arg(&tar_path)
+                    .stdin(Stdio::piped())
+                    .spawn()?;
+
+                io::copy(&mut body, &mut cmd.stdin.take().unwrap())?;
+
+                let exit = cmd.wait()?;
+
+                if !exit.success() {
+                    return Err(anyhow!(
+                        "failed to extract tarball for {dep}: tar exited with status code {}",
+                        exit.code().unwrap()
+                    ));
+                }
+
+                data = Command::new("tar")
+                    .args([
+                        "--sort=name",
+                        "--mtime=0",
+                        "--owner=0",
+                        "--group=0",
+                        "--numeric-owner",
+                        "--format=gnu",
+                        "-I",
+                        "gzip -n -9",
+                        "--create",
+                        "-C",
+                    ])
+                    .arg(workdir.path())
+                    .arg("package")
+                    .output()?
+                    .stdout;
+            } else {
+                body.read_to_end(&mut data)?;
+            }
+
+            cache
+                .put(
+                    format!("make-fetch-happen:request-cache:{resolved}"),
+                    resolved,
+                    &data,
+                    package
+                        .integrity
+                        .map(|i| Ok::<String, anyhow::Error>(get_ideal_hash(&i)?.to_string()))
+                        .transpose()?,
+                )
+                .map_err(|e| anyhow!("couldn't insert cache entry for {dep}: {e:?}"))?;
+
+            Ok::<_, anyhow::Error>(())
+        })?;
 
     fs::write(out.join("package-lock.json"), lock_content)?;
 
diff --git a/pkgs/build-support/node/fetch-npm-deps/src/tests.rs b/pkgs/build-support/node/fetch-npm-deps/src/tests.rs
index 99e091cbc2c2e..a3317207c42e4 100644
--- a/pkgs/build-support/node/fetch-npm-deps/src/tests.rs
+++ b/pkgs/build-support/node/fetch-npm-deps/src/tests.rs
@@ -1,7 +1,8 @@
 use super::{
-    get_hosted_git_url, get_ideal_hash, get_initial_url, to_new_packages, OldPackage, Package,
-    UrlOrString,
+    fixup_lockfile, get_hosted_git_url, get_ideal_hash, get_initial_url, to_new_packages,
+    OldPackage, Package, UrlOrString,
 };
+use serde_json::json;
 use std::collections::HashMap;
 use url::Url;
 
@@ -88,3 +89,53 @@ fn git_shorthand_v1() -> anyhow::Result<()> {
 
     Ok(())
 }
+
+#[test]
+fn lockfile_fixup() -> anyhow::Result<()> {
+    let input = json!({
+        "lockfileVersion": 2,
+        "name": "foo",
+        "packages": {
+            "": {
+
+            },
+            "foo": {
+                "resolved": "https://github.com/NixOS/nixpkgs",
+                "integrity": "aaa"
+            },
+            "bar": {
+                "resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git",
+                "integrity": "bbb"
+            }
+        }
+    });
+
+    let expected = json!({
+        "lockfileVersion": 2,
+        "name": "foo",
+        "packages": {
+            "": {
+
+            },
+            "foo": {
+                "resolved": "https://github.com/NixOS/nixpkgs",
+                "integrity": "aaa"
+            },
+            "bar": {
+                "resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git",
+            }
+        }
+    });
+
+    assert_eq!(
+        fixup_lockfile(input.as_object().unwrap().clone())?,
+        Some(expected.as_object().unwrap().clone())
+    );
+
+    assert_eq!(
+        fixup_lockfile(json!({"lockfileVersion": 1}).as_object().unwrap().clone())?,
+        None
+    );
+
+    Ok(())
+}