diff options
author | Someone <sergei.kozlukov@aalto.fi> | 2024-04-23 19:55:41 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-23 19:55:41 +0000 |
commit | 7035968845f28df0f28992b2f4362a78cc8d0b12 (patch) | |
tree | b882ffafc9b0d6b7ca38a4af4e0879dded107096 /nixos/modules/services | |
parent | 83cbc65da646c2890eed07e74d241bd7ded4708b (diff) | |
parent | de3ce5ffa78eace4fadcd71b51026a37b56e609a (diff) |
Merge pull request #290979 from ereslibre/cdi-add-nvidia-docker-1-directories
CDI: Add `mount-nvidia-binaries` and `mount-nvidia-docker-1-directories` options
Diffstat (limited to 'nixos/modules/services')
4 files changed, 156 insertions, 100 deletions
diff --git a/nixos/modules/services/hardware/nvidia-container-toolkit-cdi-generator/cdi-generate.nix b/nixos/modules/services/hardware/nvidia-container-toolkit-cdi-generator/cdi-generate.nix deleted file mode 100644 index 1aaa2d07b9bde..0000000000000 --- a/nixos/modules/services/hardware/nvidia-container-toolkit-cdi-generator/cdi-generate.nix +++ /dev/null @@ -1,60 +0,0 @@ -{ - addDriverRunpath, - glibc, - jq, - lib, - nvidia-container-toolkit, - nvidia-driver, - runtimeShell, - writeScriptBin, -}: -let - mountOptions = { options = ["ro" "nosuid" "nodev" "bind"]; }; - mounts = [ - # FIXME: Making /usr mounts optional - { hostPath = lib.getExe' nvidia-driver "nvidia-cuda-mps-control"; - containerPath = "/usr/bin/nvidia-cuda-mps-control"; } - { hostPath = lib.getExe' nvidia-driver "nvidia-cuda-mps-server"; - containerPath = "/usr/bin/nvidia-cuda-mps-server"; } - { hostPath = lib.getExe' nvidia-driver "nvidia-debugdump"; - containerPath = "/usr/bin/nvidia-debugdump"; } - { hostPath = lib.getExe' nvidia-driver "nvidia-powerd"; - containerPath = "/usr/bin/nvidia-powerd"; } - { hostPath = lib.getExe' nvidia-driver "nvidia-smi"; - containerPath = "/usr/bin/nvidia-smi"; } - { hostPath = lib.getExe' nvidia-container-toolkit "nvidia-ctk"; - containerPath = "/usr/bin/nvidia-ctk"; } - { hostPath = "${lib.getLib glibc}/lib"; - containerPath = "${lib.getLib glibc}/lib"; } - - # FIXME: use closureinfo - { - hostPath = addDriverRunpath.driverLink; - containerPath = addDriverRunpath.driverLink; - } - { hostPath = "${lib.getLib glibc}/lib"; - containerPath = "${lib.getLib glibc}/lib"; } - { hostPath = "${lib.getLib glibc}/lib64"; - containerPath = "${lib.getLib glibc}/lib64"; } - ]; - jqAddMountExpression = ".containerEdits.mounts[.containerEdits.mounts | length] |= . +"; - mountsToJq = lib.concatMap - (mount: - ["${lib.getExe jq} '${jqAddMountExpression} ${builtins.toJSON (mount // mountOptions)}'"]) - mounts; -in -writeScriptBin "nvidia-cdi-generator" -'' -#! ${runtimeShell} - -function cdiGenerate { - ${lib.getExe' nvidia-container-toolkit "nvidia-ctk"} cdi generate \ - --format json \ - --ldconfig-path ${lib.getExe' glibc "ldconfig"} \ - --library-search-path ${lib.getLib nvidia-driver}/lib \ - --nvidia-ctk-path ${lib.getExe' nvidia-container-toolkit "nvidia-ctk"} -} - -cdiGenerate | \ - ${lib.concatStringsSep " | " mountsToJq} > $RUNTIME_DIRECTORY/nvidia-container-toolkit.json -'' diff --git a/nixos/modules/services/hardware/nvidia-container-toolkit-cdi-generator/default.nix b/nixos/modules/services/hardware/nvidia-container-toolkit-cdi-generator/default.nix deleted file mode 100644 index 5aa3c72ee0a06..0000000000000 --- a/nixos/modules/services/hardware/nvidia-container-toolkit-cdi-generator/default.nix +++ /dev/null @@ -1,40 +0,0 @@ -{ config, lib, pkgs, ... }: - -{ - - options = { - - hardware.nvidia-container-toolkit-cdi-generator.enable = lib.mkOption { - default = false; - internal = true; - visible = false; - type = lib.types.bool; - description = '' - Enable dynamic CDI configuration for NVidia devices by running - nvidia-container-toolkit on boot. - ''; - }; - - }; - - config = { - - systemd.services.nvidia-container-toolkit-cdi-generator = lib.mkIf config.hardware.nvidia-container-toolkit-cdi-generator.enable { - description = "Container Device Interface (CDI) for Nvidia generator"; - wantedBy = [ "multi-user.target" ]; - after = [ "systemd-udev-settle.service" ]; - serviceConfig = { - RuntimeDirectory = "cdi"; - RemainAfterExit = true; - ExecStart = - let - script = pkgs.callPackage ./cdi-generate.nix { nvidia-driver = config.hardware.nvidia.package; }; - in - lib.getExe script; - Type = "oneshot"; - }; - }; - - }; - -} diff --git a/nixos/modules/services/hardware/nvidia-container-toolkit/cdi-generate.nix b/nixos/modules/services/hardware/nvidia-container-toolkit/cdi-generate.nix new file mode 100644 index 0000000000000..ca769cc44e5c9 --- /dev/null +++ b/nixos/modules/services/hardware/nvidia-container-toolkit/cdi-generate.nix @@ -0,0 +1,35 @@ +{ + glibc, + jq, + lib, + mounts, + nvidia-container-toolkit, + nvidia-driver, + runtimeShell, + writeScriptBin, +}: let + mkMount = {hostPath, containerPath, mountOptions}: { + inherit hostPath containerPath; + options = mountOptions; + }; + jqAddMountExpression = ".containerEdits.mounts[.containerEdits.mounts | length] |= . +"; + allJqMounts = lib.concatMap + (mount: + ["${lib.getExe jq} '${jqAddMountExpression} ${builtins.toJSON (mkMount mount)}'"]) + mounts; +in +writeScriptBin "nvidia-cdi-generator" +'' +#! ${runtimeShell} + +function cdiGenerate { + ${lib.getExe' nvidia-container-toolkit "nvidia-ctk"} cdi generate \ + --format json \ + --ldconfig-path ${lib.getExe' glibc "ldconfig"} \ + --library-search-path ${lib.getLib nvidia-driver}/lib \ + --nvidia-ctk-path ${lib.getExe' nvidia-container-toolkit "nvidia-ctk"} +} + +cdiGenerate | \ + ${lib.concatStringsSep " | " allJqMounts} > $RUNTIME_DIRECTORY/nvidia-container-toolkit.json +'' diff --git a/nixos/modules/services/hardware/nvidia-container-toolkit/default.nix b/nixos/modules/services/hardware/nvidia-container-toolkit/default.nix new file mode 100644 index 0000000000000..7b4973d3c6b0c --- /dev/null +++ b/nixos/modules/services/hardware/nvidia-container-toolkit/default.nix @@ -0,0 +1,121 @@ +{ config, lib, pkgs, ... }: + +{ + imports = [ + (lib.mkRenamedOptionModule + [ "virtualisation" "containers" "cdi" "dynamic" "nvidia" "enable" ] + [ "hardware" "nvidia-container-toolkit" "enable" ]) + ]; + + options = let + mountType = { + options = { + hostPath = lib.mkOption { + type = lib.types.str; + description = "Host path."; + }; + containerPath = lib.mkOption { + type = lib.types.str; + description = "Container path."; + }; + mountOptions = lib.mkOption { + default = [ "ro" "nosuid" "nodev" "bind" ]; + type = lib.types.listOf lib.types.str; + description = "Mount options."; + }; + }; + }; + in { + + hardware.nvidia-container-toolkit = { + enable = lib.mkOption { + default = false; + type = lib.types.bool; + description = '' + Enable dynamic CDI configuration for NVidia devices by running + nvidia-container-toolkit on boot. + ''; + }; + + mounts = lib.mkOption { + type = lib.types.listOf (lib.types.submodule mountType); + default = []; + description = "Mounts to be added to every container under the Nvidia CDI profile."; + }; + + mount-nvidia-executables = lib.mkOption { + default = true; + type = lib.types.bool; + description = '' + Mount executables nvidia-smi, nvidia-cuda-mps-control, nvidia-cuda-mps-server, + nvidia-debugdump, nvidia-powerd and nvidia-ctk on containers. + ''; + }; + + mount-nvidia-docker-1-directories = lib.mkOption { + default = true; + type = lib.types.bool; + description = '' + Mount nvidia-docker-1 directories on containers: /usr/local/nvidia/lib and + /usr/local/nvidia/lib64. + ''; + }; + }; + + }; + + config = { + + hardware.nvidia-container-toolkit.mounts = let + nvidia-driver = config.hardware.nvidia.package; + in (lib.mkMerge [ + [{ hostPath = pkgs.addDriverRunpath.driverLink; + containerPath = pkgs.addDriverRunpath.driverLink; } + { hostPath = "${lib.getLib pkgs.glibc}/lib"; + containerPath = "${lib.getLib pkgs.glibc}/lib"; } + { hostPath = "${lib.getLib pkgs.glibc}/lib64"; + containerPath = "${lib.getLib pkgs.glibc}/lib64"; }] + (lib.mkIf config.hardware.nvidia-container-toolkit.mount-nvidia-executables + [{ hostPath = lib.getExe' nvidia-driver "nvidia-cuda-mps-control"; + containerPath = "/usr/bin/nvidia-cuda-mps-control"; } + { hostPath = lib.getExe' nvidia-driver "nvidia-cuda-mps-server"; + containerPath = "/usr/bin/nvidia-cuda-mps-server"; } + { hostPath = lib.getExe' nvidia-driver "nvidia-debugdump"; + containerPath = "/usr/bin/nvidia-debugdump"; } + { hostPath = lib.getExe' nvidia-driver "nvidia-powerd"; + containerPath = "/usr/bin/nvidia-powerd"; } + { hostPath = lib.getExe' nvidia-driver "nvidia-smi"; + containerPath = "/usr/bin/nvidia-smi"; }]) + # nvidia-docker 1.0 uses /usr/local/nvidia/lib{,64} + # e.g. + # - https://gitlab.com/nvidia/container-images/cuda/-/blob/e3ff10eab3a1424fe394899df0e0f8ca5a410f0f/dist/12.3.1/ubi9/base/Dockerfile#L44 + # - https://github.com/NVIDIA/nvidia-docker/blob/01d2c9436620d7dde4672e414698afe6da4a282f/src/nvidia/volumes.go#L104-L173 + (lib.mkIf config.hardware.nvidia-container-toolkit.mount-nvidia-docker-1-directories + [{ hostPath = "${lib.getLib nvidia-driver}/lib"; + containerPath = "/usr/local/nvidia/lib"; } + { hostPath = "${lib.getLib nvidia-driver}/lib"; + containerPath = "/usr/local/nvidia/lib64"; }]) + ]); + + systemd.services.nvidia-container-toolkit-cdi-generator = lib.mkIf config.hardware.nvidia-container-toolkit.enable { + description = "Container Device Interface (CDI) for Nvidia generator"; + wantedBy = [ "multi-user.target" ]; + after = [ "systemd-udev-settle.service" ]; + serviceConfig = { + RuntimeDirectory = "cdi"; + RemainAfterExit = true; + ExecStart = + let + script = pkgs.callPackage ./cdi-generate.nix { + inherit (config.hardware.nvidia-container-toolkit) mounts; + nvidia-driver = config.hardware.nvidia.package; + }; + in + lib.getExe script; + Type = "oneshot"; + }; + }; + + }; + +} |