diff options
author | Rafael Fernández López <ereslibre@ereslibre.es> | 2024-02-17 18:16:22 +0100 |
---|---|---|
committer | Rafael Fernández López <ereslibre@ereslibre.es> | 2024-02-21 22:17:11 +0100 |
commit | 6ac6aab1993eeb0d5da48054ff3d89b666c241e5 (patch) | |
tree | 78a988c4b4161c64d6aeb0a870505ddf71670d3a /pkgs/by-name/nv | |
parent | 8ba61ebb8acb51bf3d249a10a4671c5aaec7d91d (diff) |
nvidia-container-toolkit: move to by-name structure
Diffstat (limited to 'pkgs/by-name/nv')
4 files changed, 350 insertions, 0 deletions
diff --git a/pkgs/by-name/nv/nvidia-container-toolkit/0001-Add-dlopen-discoverer.patch b/pkgs/by-name/nv/nvidia-container-toolkit/0001-Add-dlopen-discoverer.patch new file mode 100644 index 0000000000000..ddc7d34ac7c1b --- /dev/null +++ b/pkgs/by-name/nv/nvidia-container-toolkit/0001-Add-dlopen-discoverer.patch @@ -0,0 +1,90 @@ +From e4449f06a8989ff22947309151855b388c311aed Mon Sep 17 00:00:00 2001 +From: Jared Baur <jaredbaur@fastmail.com> +Date: Mon, 22 Jan 2024 20:42:48 -0800 +Subject: [PATCH] Add dlopen discoverer + +--- + internal/lookup/dlopen.go | 57 ++++++++++++++++++++++++++++++++++++++ + internal/lookup/library.go | 3 ++ + 2 files changed, 60 insertions(+) + create mode 100644 internal/lookup/dlopen.go + +diff --git a/internal/lookup/dlopen.go b/internal/lookup/dlopen.go +new file mode 100644 +index 00000000..7cd84522 +--- /dev/null ++++ b/internal/lookup/dlopen.go +@@ -0,0 +1,57 @@ ++package lookup ++ ++// #cgo LDFLAGS: -ldl ++// #define _GNU_SOURCE ++// #include <dlfcn.h> ++// #include <stdlib.h> ++import "C" ++ ++import ( ++ "fmt" ++ "path/filepath" ++ "unsafe" ++) ++ ++// dlopenLocator can be used to locate libraries given a system's dynamic ++// linker. ++type dlopenLocator struct { ++ file ++} ++ ++// NewDlopenLocator creats a locator that can be used for locating libraries ++// through the dlopen mechanism. ++func NewDlopenLocator(opts ...Option) Locator { ++ f := newFileLocator(opts...) ++ d := dlopenLocator{file: *f} ++ return &d ++} ++ ++// Locate finds the specified pattern if the systems' dynamic linker can find ++// it via dlopen. Note that patterns with wildcard patterns will likely not be ++// found as it is uncommon for libraries to have wildcard patterns in their ++// file name. ++func (d dlopenLocator) Locate(pattern string) ([]string, error) { ++ libname := C.CString(pattern) ++ defer C.free(unsafe.Pointer(libname)) ++ ++ d.logger.Debugf("Calling dlopen for %s", pattern) ++ ++ handle := C.dlopen(libname, C.RTLD_LAZY) ++ if handle == nil { ++ return nil, fmt.Errorf("dlopen %s failed", pattern) ++ } ++ defer C.dlclose(handle) ++ ++ libParentPath := C.CString("") ++ ++ d.logger.Debugf("Calling dlinfo on handle for %s", pattern) ++ ret := C.dlinfo(handle, C.RTLD_DI_ORIGIN, unsafe.Pointer(libParentPath)) ++ if ret == -1 { ++ return nil, fmt.Errorf("dlinfo on handle for %s failed", pattern) ++ } ++ ++ libAbsolutePath := filepath.Join(C.GoString(libParentPath), pattern) ++ d.logger.Debugf("Found library for %s at %s", pattern, libAbsolutePath) ++ ++ return []string{libAbsolutePath}, nil ++} +diff --git a/internal/lookup/library.go b/internal/lookup/library.go +index 7f5cf7c8..916edde2 100644 +--- a/internal/lookup/library.go ++++ b/internal/lookup/library.go +@@ -61,7 +61,10 @@ func NewLibraryLocator(opts ...Option) Locator { + // We construct a symlink locator for expected library locations. + symlinkLocator := NewSymlinkLocator(opts...) + ++ dlopenLocator := NewDlopenLocator(opts...) ++ + l := First( ++ dlopenLocator, + symlinkLocator, + newLdcacheLocator(opts...), + ) +-- diff --git a/pkgs/by-name/nv/nvidia-container-toolkit/nvidia-docker.nix b/pkgs/by-name/nv/nvidia-container-toolkit/nvidia-docker.nix new file mode 100644 index 0000000000000..07dfc3fdb6dc3 --- /dev/null +++ b/pkgs/by-name/nv/nvidia-container-toolkit/nvidia-docker.nix @@ -0,0 +1,32 @@ +{ stdenv, lib, fetchFromGitHub, callPackage }: +stdenv.mkDerivation rec { + pname = "nvidia-docker"; + version = "2.5.0"; + + src = fetchFromGitHub { + owner = "NVIDIA"; + repo = pname; + rev = "v${version}"; + sha256 = "1n1k7fnimky67s12p2ycaq9mgk245fchq62vgd7bl3bzfcbg0z4h"; + }; + + buildPhase = '' + mkdir bin + + cp nvidia-docker bin + substituteInPlace bin/nvidia-docker --subst-var-by VERSION ${version} + ''; + + installPhase = '' + mkdir -p $out/bin + cp bin/nvidia-docker $out/bin + ''; + + meta = with lib; { + homepage = "https://github.com/NVIDIA/nvidia-docker"; + description = "NVIDIA container runtime for Docker"; + license = licenses.bsd3; + platforms = platforms.linux; + maintainers = with maintainers; [ cpcloud ]; + }; +} diff --git a/pkgs/by-name/nv/nvidia-container-toolkit/package.nix b/pkgs/by-name/nv/nvidia-container-toolkit/package.nix new file mode 100644 index 0000000000000..a584be35e7b20 --- /dev/null +++ b/pkgs/by-name/nv/nvidia-container-toolkit/package.nix @@ -0,0 +1,148 @@ +{ lib +, glibc +, fetchFromGitLab +, makeWrapper +, buildGoModule +, linkFarm +, writeShellScript +, formats +, containerRuntimePath ? null +, configTemplate ? null +, configTemplatePath ? null +, libnvidia-container +, cudaPackages +}: + +assert configTemplate != null -> (lib.isAttrs configTemplate && configTemplatePath == null); +assert configTemplatePath != null -> (lib.isStringLike configTemplatePath && configTemplate == null); + +let + isolatedContainerRuntimePath = linkFarm "isolated_container_runtime_path" [ + { + name = "runc"; + path = containerRuntimePath; + } + ]; + warnIfXdgConfigHomeIsSet = writeShellScript "warn_if_xdg_config_home_is_set" '' + set -eo pipefail + + if [ -n "$XDG_CONFIG_HOME" ]; then + echo >&2 "$(tput setaf 3)warning: \$XDG_CONFIG_HOME=$XDG_CONFIG_HOME$(tput sgr 0)" + fi + ''; + + configToml = if configTemplatePath != null then configTemplatePath else (formats.toml { }).generate "config.toml" configTemplate; + + # From https://gitlab.com/nvidia/container-toolkit/container-toolkit/-/blob/03cbf9c6cd26c75afef8a2dd68e0306aace80401/Makefile#L54 + cliVersionPackage = "github.com/NVIDIA/nvidia-container-toolkit/internal/info"; +in +buildGoModule rec { + pname = "container-toolkit/container-toolkit"; + version = "1.15.0-rc.3"; + + src = fetchFromGitLab { + owner = "nvidia"; + repo = pname; + rev = "v${version}"; + hash = "sha256-IH2OjaLbcKSGG44aggolAOuJkjk+GaXnnTbrXfZ0lVo="; + + }; + + vendorHash = null; + + patches = [ + # This patch causes library lookups to first attempt loading via dlopen + # before falling back to the regular symlink location and ldcache location. + ./0001-Add-dlopen-discoverer.patch + ]; + + postPatch = '' + # Replace the default hookDefaultFilePath to the $out path and override + # default ldconfig locations to the one in nixpkgs. + + substituteInPlace internal/config/config.go \ + --replace '/usr/bin/nvidia-container-runtime-hook' "$out/bin/nvidia-container-runtime-hook" \ + --replace '/sbin/ldconfig' '${lib.getBin glibc}/sbin/ldconfig' + + substituteInPlace internal/config/config_test.go \ + --replace '/sbin/ldconfig' '${lib.getBin glibc}/sbin/ldconfig' + + substituteInPlace tools/container/toolkit/toolkit.go \ + --replace '/sbin/ldconfig' '${lib.getBin glibc}/sbin/ldconfig' + + substituteInPlace cmd/nvidia-ctk/hook/update-ldcache/update-ldcache.go \ + --replace '/sbin/ldconfig' '${lib.getBin glibc}/sbin/ldconfig' + ''; + + # Based on upstream's Makefile: + # https://gitlab.com/nvidia/container-toolkit/container-toolkit/-/blob/03cbf9c6cd26c75afef8a2dd68e0306aace80401/Makefile#L64 + ldflags = [ + "-extldflags=-Wl,-z,lazy" # May be redunandant, cf. `man ld`: "Lazy binding is the default". + "-s" # "disable symbol table" + "-w" # "disable DWARF generation" + + # "-X name=value" + "-X" + "${cliVersionPackage}.version=${version}" + ]; + + nativeBuildInputs = [ + cudaPackages.autoAddOpenGLRunpathHook + makeWrapper + ]; + + preConfigure = lib.optionalString (containerRuntimePath != null) '' + # Ensure the runc symlink isn't broken: + if ! readlink --quiet --canonicalize-existing "${isolatedContainerRuntimePath}/runc" ; then + echo "${isolatedContainerRuntimePath}/runc: broken symlink" >&2 + exit 1 + fi + ''; + + checkFlags = + let + skippedTests = [ + # Disable tests executing nvidia-container-runtime command. + "TestGoodInput" + "TestDuplicateHook" + ]; + in + [ "-skip" "${builtins.concatStringsSep "|" skippedTests}" ]; + + postInstall = lib.optionalString (containerRuntimePath != null) '' + mkdir -p $out/etc/nvidia-container-runtime + + # nvidia-container-runtime invokes docker-runc or runc if that isn't + # available on PATH. + # + # Also set XDG_CONFIG_HOME if it isn't already to allow overriding + # configuration. This in turn allows users to have the nvidia container + # runtime enabled for any number of higher level runtimes like docker and + # podman, i.e., there's no need to have mutually exclusivity on what high + # level runtime can enable the nvidia runtime because each high level + # runtime has its own config.toml file. + wrapProgram $out/bin/nvidia-container-runtime \ + --run "${warnIfXdgConfigHomeIsSet}" \ + --prefix PATH : ${isolatedContainerRuntimePath}:${libnvidia-container}/bin \ + --set-default XDG_CONFIG_HOME $out/etc + + cp ${configToml} $out/etc/nvidia-container-runtime/config.toml + + substituteInPlace $out/etc/nvidia-container-runtime/config.toml \ + --subst-var-by glibcbin ${lib.getBin glibc} + + # See: https://gitlab.com/nvidia/container-toolkit/container-toolkit/-/blob/03cbf9c6cd26c75afef8a2dd68e0306aace80401/packaging/debian/nvidia-container-toolkit.postinst#L12 + ln -s $out/bin/nvidia-container-runtime-hook $out/bin/nvidia-container-toolkit + + wrapProgram $out/bin/nvidia-container-toolkit \ + --add-flags "-config ${placeholder "out"}/etc/nvidia-container-runtime/config.toml" + ''; + + meta = with lib; { + homepage = "https://gitlab.com/nvidia/container-toolkit/container-toolkit"; + description = "NVIDIA Container Toolkit"; + license = licenses.asl20; + platforms = platforms.linux; + maintainers = with maintainers; [ cpcloud ]; + }; +} diff --git a/pkgs/by-name/nv/nvidia-container-toolkit/packages.nix b/pkgs/by-name/nv/nvidia-container-toolkit/packages.nix new file mode 100644 index 0000000000000..c78b6b1c28349 --- /dev/null +++ b/pkgs/by-name/nv/nvidia-container-toolkit/packages.nix @@ -0,0 +1,80 @@ +{ + lib, + newScope, + docker, + libnvidia-container, + runc, + symlinkJoin, +}: + +# Note this scope isn't recursed into, at the time of writing. +lib.makeScope newScope ( + self: { + + # The config is only exposed as an attrset so that the user may reach the + # deafult values, for inspectability purposes. + dockerConfig = { + disable-require = false; + #swarm-resource = "DOCKER_RESOURCE_GPU" + + nvidia-container-cli = { + #root = "/run/nvidia/driver"; + #path = "/usr/bin/nvidia-container-cli"; + environment = [ ]; + #debug = "/var/log/nvidia-container-runtime-hook.log"; + ldcache = "/tmp/ld.so.cache"; + load-kmods = true; + #no-cgroups = false; + #user = "root:video"; + ldconfig = "@@glibcbin@/bin/ldconfig"; + }; + }; + nvidia-container-toolkit-docker = self.callPackage ./package.nix { + containerRuntimePath = "${docker}/libexec/docker/docker"; + configTemplate = self.dockerConfig; + }; + + podmanConfig = { + disable-require = true; + #swarm-resource = "DOCKER_RESOURCE_GPU"; + + nvidia-container-cli = { + #root = "/run/nvidia/driver"; + #path = "/usr/bin/nvidia-container-cli"; + environment = [ ]; + #debug = "/var/log/nvidia-container-runtime-hook.log"; + ldcache = "/tmp/ld.so.cache"; + load-kmods = true; + no-cgroups = true; + #user = "root:video"; + ldconfig = "@@glibcbin@/bin/ldconfig"; + }; + }; + nvidia-container-toolkit-podman = self.nvidia-container-toolkit-docker.override { + containerRuntimePath = lib.getExe runc; + + configTemplate = self.podmanConfig; + }; + + nvidia-docker = symlinkJoin { + name = "nvidia-docker"; + paths = [ + libnvidia-container + self.nvidia-docker-unwrapped + self.nvidia-container-toolkit-docker + ]; + inherit (self.nvidia-docker-unwrapped) meta; + }; + nvidia-docker-unwrapped = + self.callPackage ./nvidia-docker.nix { }; + + nvidia-podman = symlinkJoin { + name = "nvidia-podman"; + paths = [ + libnvidia-container + self.nvidia-container-toolkit-podman + ]; + inherit (self.nvidia-container-toolkit-podman) meta; + }; + } +) |