about summary refs log tree commit diff
path: root/pkgs/by-name/nv
diff options
context:
space:
mode:
authorRafael Fernández López <ereslibre@ereslibre.es>2024-02-17 18:16:22 +0100
committerRafael Fernández López <ereslibre@ereslibre.es>2024-02-21 22:17:11 +0100
commit6ac6aab1993eeb0d5da48054ff3d89b666c241e5 (patch)
tree78a988c4b4161c64d6aeb0a870505ddf71670d3a /pkgs/by-name/nv
parent8ba61ebb8acb51bf3d249a10a4671c5aaec7d91d (diff)
nvidia-container-toolkit: move to by-name structure
Diffstat (limited to 'pkgs/by-name/nv')
-rw-r--r--pkgs/by-name/nv/nvidia-container-toolkit/0001-Add-dlopen-discoverer.patch90
-rw-r--r--pkgs/by-name/nv/nvidia-container-toolkit/nvidia-docker.nix32
-rw-r--r--pkgs/by-name/nv/nvidia-container-toolkit/package.nix148
-rw-r--r--pkgs/by-name/nv/nvidia-container-toolkit/packages.nix80
4 files changed, 350 insertions, 0 deletions
diff --git a/pkgs/by-name/nv/nvidia-container-toolkit/0001-Add-dlopen-discoverer.patch b/pkgs/by-name/nv/nvidia-container-toolkit/0001-Add-dlopen-discoverer.patch
new file mode 100644
index 0000000000000..ddc7d34ac7c1b
--- /dev/null
+++ b/pkgs/by-name/nv/nvidia-container-toolkit/0001-Add-dlopen-discoverer.patch
@@ -0,0 +1,90 @@
+From e4449f06a8989ff22947309151855b388c311aed Mon Sep 17 00:00:00 2001
+From: Jared Baur <jaredbaur@fastmail.com>
+Date: Mon, 22 Jan 2024 20:42:48 -0800
+Subject: [PATCH] Add dlopen discoverer
+
+---
+ internal/lookup/dlopen.go  | 57 ++++++++++++++++++++++++++++++++++++++
+ internal/lookup/library.go |  3 ++
+ 2 files changed, 60 insertions(+)
+ create mode 100644 internal/lookup/dlopen.go
+
+diff --git a/internal/lookup/dlopen.go b/internal/lookup/dlopen.go
+new file mode 100644
+index 00000000..7cd84522
+--- /dev/null
++++ b/internal/lookup/dlopen.go
+@@ -0,0 +1,57 @@
++package lookup
++
++// #cgo LDFLAGS: -ldl
++// #define _GNU_SOURCE
++// #include <dlfcn.h>
++// #include <stdlib.h>
++import "C"
++
++import (
++	"fmt"
++	"path/filepath"
++	"unsafe"
++)
++
++// dlopenLocator can be used to locate libraries given a system's dynamic
++// linker.
++type dlopenLocator struct {
++	file
++}
++
++// NewDlopenLocator creats a locator that can be used for locating libraries
++// through the dlopen mechanism.
++func NewDlopenLocator(opts ...Option) Locator {
++	f := newFileLocator(opts...)
++	d := dlopenLocator{file: *f}
++	return &d
++}
++
++// Locate finds the specified pattern if the systems' dynamic linker can find
++// it via dlopen. Note that patterns with wildcard patterns will likely not be
++// found as it is uncommon for libraries to have wildcard patterns in their
++// file name.
++func (d dlopenLocator) Locate(pattern string) ([]string, error) {
++	libname := C.CString(pattern)
++	defer C.free(unsafe.Pointer(libname))
++
++	d.logger.Debugf("Calling dlopen for %s", pattern)
++
++	handle := C.dlopen(libname, C.RTLD_LAZY)
++	if handle == nil {
++		return nil, fmt.Errorf("dlopen %s failed", pattern)
++	}
++	defer C.dlclose(handle)
++
++	libParentPath := C.CString("")
++
++	d.logger.Debugf("Calling dlinfo on handle for %s", pattern)
++	ret := C.dlinfo(handle, C.RTLD_DI_ORIGIN, unsafe.Pointer(libParentPath))
++	if ret == -1 {
++		return nil, fmt.Errorf("dlinfo on handle for %s failed", pattern)
++	}
++
++	libAbsolutePath := filepath.Join(C.GoString(libParentPath), pattern)
++	d.logger.Debugf("Found library for %s at %s", pattern, libAbsolutePath)
++
++	return []string{libAbsolutePath}, nil
++}
+diff --git a/internal/lookup/library.go b/internal/lookup/library.go
+index 7f5cf7c8..916edde2 100644
+--- a/internal/lookup/library.go
++++ b/internal/lookup/library.go
+@@ -61,7 +61,10 @@ func NewLibraryLocator(opts ...Option) Locator {
+ 	// We construct a symlink locator for expected library locations.
+ 	symlinkLocator := NewSymlinkLocator(opts...)
+ 
++	dlopenLocator := NewDlopenLocator(opts...)
++
+ 	l := First(
++		dlopenLocator,
+ 		symlinkLocator,
+ 		newLdcacheLocator(opts...),
+ 	)
+--
diff --git a/pkgs/by-name/nv/nvidia-container-toolkit/nvidia-docker.nix b/pkgs/by-name/nv/nvidia-container-toolkit/nvidia-docker.nix
new file mode 100644
index 0000000000000..07dfc3fdb6dc3
--- /dev/null
+++ b/pkgs/by-name/nv/nvidia-container-toolkit/nvidia-docker.nix
@@ -0,0 +1,32 @@
+{ stdenv, lib, fetchFromGitHub, callPackage }:
+stdenv.mkDerivation rec {
+  pname = "nvidia-docker";
+  version = "2.5.0";
+
+  src = fetchFromGitHub {
+    owner = "NVIDIA";
+    repo = pname;
+    rev = "v${version}";
+    sha256 = "1n1k7fnimky67s12p2ycaq9mgk245fchq62vgd7bl3bzfcbg0z4h";
+  };
+
+  buildPhase = ''
+    mkdir bin
+
+    cp nvidia-docker bin
+    substituteInPlace bin/nvidia-docker --subst-var-by VERSION ${version}
+  '';
+
+  installPhase = ''
+    mkdir -p $out/bin
+    cp bin/nvidia-docker $out/bin
+  '';
+
+  meta = with lib; {
+    homepage = "https://github.com/NVIDIA/nvidia-docker";
+    description = "NVIDIA container runtime for Docker";
+    license = licenses.bsd3;
+    platforms = platforms.linux;
+    maintainers = with maintainers; [ cpcloud ];
+  };
+}
diff --git a/pkgs/by-name/nv/nvidia-container-toolkit/package.nix b/pkgs/by-name/nv/nvidia-container-toolkit/package.nix
new file mode 100644
index 0000000000000..a584be35e7b20
--- /dev/null
+++ b/pkgs/by-name/nv/nvidia-container-toolkit/package.nix
@@ -0,0 +1,148 @@
+{ lib
+, glibc
+, fetchFromGitLab
+, makeWrapper
+, buildGoModule
+, linkFarm
+, writeShellScript
+, formats
+, containerRuntimePath ? null
+, configTemplate ? null
+, configTemplatePath ? null
+, libnvidia-container
+, cudaPackages
+}:
+
+assert configTemplate != null -> (lib.isAttrs configTemplate && configTemplatePath == null);
+assert configTemplatePath != null -> (lib.isStringLike configTemplatePath && configTemplate == null);
+
+let
+  isolatedContainerRuntimePath = linkFarm "isolated_container_runtime_path" [
+    {
+      name = "runc";
+      path = containerRuntimePath;
+    }
+  ];
+  warnIfXdgConfigHomeIsSet = writeShellScript "warn_if_xdg_config_home_is_set" ''
+    set -eo pipefail
+
+    if [ -n "$XDG_CONFIG_HOME" ]; then
+      echo >&2 "$(tput setaf 3)warning: \$XDG_CONFIG_HOME=$XDG_CONFIG_HOME$(tput sgr 0)"
+    fi
+  '';
+
+  configToml = if configTemplatePath != null then configTemplatePath else (formats.toml { }).generate "config.toml" configTemplate;
+
+  # From https://gitlab.com/nvidia/container-toolkit/container-toolkit/-/blob/03cbf9c6cd26c75afef8a2dd68e0306aace80401/Makefile#L54
+  cliVersionPackage = "github.com/NVIDIA/nvidia-container-toolkit/internal/info";
+in
+buildGoModule rec {
+  pname = "container-toolkit/container-toolkit";
+  version = "1.15.0-rc.3";
+
+  src = fetchFromGitLab {
+    owner = "nvidia";
+    repo = pname;
+    rev = "v${version}";
+    hash = "sha256-IH2OjaLbcKSGG44aggolAOuJkjk+GaXnnTbrXfZ0lVo=";
+
+  };
+
+  vendorHash = null;
+
+  patches = [
+    # This patch causes library lookups to first attempt loading via dlopen
+    # before falling back to the regular symlink location and ldcache location.
+    ./0001-Add-dlopen-discoverer.patch
+  ];
+
+  postPatch = ''
+    # Replace the default hookDefaultFilePath to the $out path and override
+    # default ldconfig locations to the one in nixpkgs.
+
+    substituteInPlace internal/config/config.go \
+      --replace '/usr/bin/nvidia-container-runtime-hook' "$out/bin/nvidia-container-runtime-hook" \
+      --replace '/sbin/ldconfig' '${lib.getBin glibc}/sbin/ldconfig'
+
+    substituteInPlace internal/config/config_test.go \
+      --replace '/sbin/ldconfig' '${lib.getBin glibc}/sbin/ldconfig'
+
+    substituteInPlace tools/container/toolkit/toolkit.go \
+      --replace '/sbin/ldconfig' '${lib.getBin glibc}/sbin/ldconfig'
+
+    substituteInPlace cmd/nvidia-ctk/hook/update-ldcache/update-ldcache.go \
+      --replace '/sbin/ldconfig' '${lib.getBin glibc}/sbin/ldconfig'
+  '';
+
+  # Based on upstream's Makefile:
+  # https://gitlab.com/nvidia/container-toolkit/container-toolkit/-/blob/03cbf9c6cd26c75afef8a2dd68e0306aace80401/Makefile#L64
+  ldflags = [
+    "-extldflags=-Wl,-z,lazy" # May be redunandant, cf. `man ld`: "Lazy binding is the default".
+    "-s" # "disable symbol table"
+    "-w" # "disable DWARF generation"
+
+    # "-X name=value"
+    "-X"
+    "${cliVersionPackage}.version=${version}"
+  ];
+
+  nativeBuildInputs = [
+    cudaPackages.autoAddOpenGLRunpathHook
+    makeWrapper
+  ];
+
+  preConfigure = lib.optionalString (containerRuntimePath != null) ''
+    # Ensure the runc symlink isn't broken:
+    if ! readlink --quiet --canonicalize-existing "${isolatedContainerRuntimePath}/runc" ; then
+      echo "${isolatedContainerRuntimePath}/runc: broken symlink" >&2
+      exit 1
+    fi
+  '';
+
+  checkFlags =
+    let
+      skippedTests = [
+        # Disable tests executing nvidia-container-runtime command.
+        "TestGoodInput"
+        "TestDuplicateHook"
+      ];
+    in
+    [ "-skip" "${builtins.concatStringsSep "|" skippedTests}" ];
+
+  postInstall = lib.optionalString (containerRuntimePath != null) ''
+    mkdir -p $out/etc/nvidia-container-runtime
+
+    # nvidia-container-runtime invokes docker-runc or runc if that isn't
+    # available on PATH.
+    #
+    # Also set XDG_CONFIG_HOME if it isn't already to allow overriding
+    # configuration. This in turn allows users to have the nvidia container
+    # runtime enabled for any number of higher level runtimes like docker and
+    # podman, i.e., there's no need to have mutually exclusivity on what high
+    # level runtime can enable the nvidia runtime because each high level
+    # runtime has its own config.toml file.
+    wrapProgram $out/bin/nvidia-container-runtime \
+      --run "${warnIfXdgConfigHomeIsSet}" \
+      --prefix PATH : ${isolatedContainerRuntimePath}:${libnvidia-container}/bin \
+      --set-default XDG_CONFIG_HOME $out/etc
+
+    cp ${configToml} $out/etc/nvidia-container-runtime/config.toml
+
+    substituteInPlace $out/etc/nvidia-container-runtime/config.toml \
+      --subst-var-by glibcbin ${lib.getBin glibc}
+
+    # See: https://gitlab.com/nvidia/container-toolkit/container-toolkit/-/blob/03cbf9c6cd26c75afef8a2dd68e0306aace80401/packaging/debian/nvidia-container-toolkit.postinst#L12
+    ln -s $out/bin/nvidia-container-runtime-hook $out/bin/nvidia-container-toolkit
+
+    wrapProgram $out/bin/nvidia-container-toolkit \
+      --add-flags "-config ${placeholder "out"}/etc/nvidia-container-runtime/config.toml"
+  '';
+
+  meta = with lib; {
+    homepage = "https://gitlab.com/nvidia/container-toolkit/container-toolkit";
+    description = "NVIDIA Container Toolkit";
+    license = licenses.asl20;
+    platforms = platforms.linux;
+    maintainers = with maintainers; [ cpcloud ];
+  };
+}
diff --git a/pkgs/by-name/nv/nvidia-container-toolkit/packages.nix b/pkgs/by-name/nv/nvidia-container-toolkit/packages.nix
new file mode 100644
index 0000000000000..c78b6b1c28349
--- /dev/null
+++ b/pkgs/by-name/nv/nvidia-container-toolkit/packages.nix
@@ -0,0 +1,80 @@
+{
+  lib,
+  newScope,
+  docker,
+  libnvidia-container,
+  runc,
+  symlinkJoin,
+}:
+
+# Note this scope isn't recursed into, at the time of writing.
+lib.makeScope newScope (
+  self: {
+
+    # The config is only exposed as an attrset so that the user may reach the
+    # deafult values, for inspectability purposes.
+    dockerConfig = {
+      disable-require = false;
+      #swarm-resource = "DOCKER_RESOURCE_GPU"
+
+      nvidia-container-cli = {
+        #root = "/run/nvidia/driver";
+        #path = "/usr/bin/nvidia-container-cli";
+        environment = [ ];
+        #debug = "/var/log/nvidia-container-runtime-hook.log";
+        ldcache = "/tmp/ld.so.cache";
+        load-kmods = true;
+        #no-cgroups = false;
+        #user = "root:video";
+        ldconfig = "@@glibcbin@/bin/ldconfig";
+      };
+    };
+    nvidia-container-toolkit-docker = self.callPackage ./package.nix {
+      containerRuntimePath = "${docker}/libexec/docker/docker";
+      configTemplate = self.dockerConfig;
+    };
+
+    podmanConfig = {
+      disable-require = true;
+      #swarm-resource = "DOCKER_RESOURCE_GPU";
+
+      nvidia-container-cli = {
+        #root = "/run/nvidia/driver";
+        #path = "/usr/bin/nvidia-container-cli";
+        environment = [ ];
+        #debug = "/var/log/nvidia-container-runtime-hook.log";
+        ldcache = "/tmp/ld.so.cache";
+        load-kmods = true;
+        no-cgroups = true;
+        #user = "root:video";
+        ldconfig = "@@glibcbin@/bin/ldconfig";
+      };
+    };
+    nvidia-container-toolkit-podman = self.nvidia-container-toolkit-docker.override {
+      containerRuntimePath = lib.getExe runc;
+
+      configTemplate = self.podmanConfig;
+    };
+
+    nvidia-docker = symlinkJoin {
+      name = "nvidia-docker";
+      paths = [
+        libnvidia-container
+        self.nvidia-docker-unwrapped
+        self.nvidia-container-toolkit-docker
+      ];
+      inherit (self.nvidia-docker-unwrapped) meta;
+    };
+    nvidia-docker-unwrapped =
+      self.callPackage ./nvidia-docker.nix { };
+
+    nvidia-podman = symlinkJoin {
+      name = "nvidia-podman";
+      paths = [
+        libnvidia-container
+        self.nvidia-container-toolkit-podman
+      ];
+      inherit (self.nvidia-container-toolkit-podman) meta;
+    };
+  }
+)