From 8e800cedaf24f5ad9717463b809b0beef7677000 Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Tue, 7 Nov 2023 14:35:11 +0000 Subject: cudaPackages: move derivations to cuda-modules & support aarch64 cudaPackages.cuda_compat: ignore missing libs provided at runtime cudaPackages.gpus: Jetson should never build by default cudaPackages.flags: don't build Jetson capabilities by default cudaPackages: re-introduce filter for pre-existing CUDA redist packages in overrides cudaPackages: only recurseIntoAttrs for the latest of each major version cudaPackages.nvccCompatabilities: use GCC 10 through CUDA 11.5 to avoid a GLIBC incompatability cudaPackages.cutensor: acquire libcublas through cudatoolkit prior to 11.4 cudaPackages.cuda_compat: mark as broken on aarch64-linux if not targeting Jetson cudaPackages.cutensor_1_4: fix build cudaPackages: adjust use of autoPatchelfIgnoreMissingDeps cudaPackages.cuda_nvprof: remove unecessary override to add addOpenGLRunpath cudaPackages: use getExe' to avoid patchelf warning about missing meta.mainProgram cudaPackages: fix evaluation with Nix 2.3 cudaPackages: fix platform detection for Jetson/non-Jetson aarch64-linux python3Packages.tensorrt: mark as broken if required packages are missing Note: evaluating the name of the derivation will fail if tensorrt is not present, which is why we wrap the value in `lib.optionalString`. cudaPackages.flags.getNixSystem: add guard based on jetsonTargets cudaPackages.cudnn: use explicit path to patchelf cudaPackages.tensorrt: use explicit path to patchelf --- .../cuda-modules/generic-builders/manifest.nix | 249 +++++++++++++++++++++ .../cuda-modules/generic-builders/multiplex.nix | 131 +++++++++++ 2 files changed, 380 insertions(+) create mode 100644 pkgs/development/cuda-modules/generic-builders/manifest.nix create mode 100644 pkgs/development/cuda-modules/generic-builders/multiplex.nix (limited to 'pkgs/development/cuda-modules/generic-builders') diff --git a/pkgs/development/cuda-modules/generic-builders/manifest.nix b/pkgs/development/cuda-modules/generic-builders/manifest.nix new file mode 100644 index 0000000000000..01398d1165116 --- /dev/null +++ b/pkgs/development/cuda-modules/generic-builders/manifest.nix @@ -0,0 +1,249 @@ +{ + # General callPackage-supplied arguments + autoAddOpenGLRunpathHook, + autoPatchelfHook, + backendStdenv, + fetchurl, + lib, + lndir, + markForCudatoolkitRootHook, + flags, + stdenv, + hostPlatform, + # Builder-specific arguments + # Short package name (e.g., "cuda_cccl") + # pname : String + pname, + # Common name (e.g., "cutensor" or "cudnn") -- used in the URL. + # Also known as the Redistributable Name. + # redistName : String, + redistName, + # If libPath is non-null, it must be a subdirectory of `lib`. + # The contents of `libPath` will be moved to the root of `lib`. + libPath ? null, + # See ./modules/generic/manifests/redistrib/release.nix + redistribRelease, + # See ./modules/generic/manifests/feature/release.nix + featureRelease, +}: +let + inherit (lib) + attrsets + lists + meta + strings + trivial + licenses + teams + sourceTypes + ; + + # Get the redist architectures for which package provides distributables. + # These are used by meta.platforms. + supportedRedistArchs = builtins.attrNames featureRelease; + redistArch = flags.getRedistArch hostPlatform.system; +in +backendStdenv.mkDerivation ( + finalAttrs: { + # NOTE: Even though there's no actual buildPhase going on here, the derivations of the + # redistributables are sensitive to the compiler flags provided to stdenv. The patchelf package + # is sensitive to the compiler flags provided to stdenv, and we depend on it. As such, we are + # also sensitive to the compiler flags provided to stdenv. + inherit pname; + inherit (redistribRelease) version; + + # Don't force serialization to string for structured attributes, like outputToPatterns + # and brokenConditions. + # Avoids "set cannot be coerced to string" errors. + __structuredAttrs = true; + + # Keep better track of dependencies. + strictDeps = true; + + # TODO(@connorbaker): Update `cuda-redist-find-features` to produce an attrset of boolean values for the + # outputs instead of `has*` attributes. + # NOTE: Outputs are evaluated jointly with meta, so in the case that this is an unsupported platform, + # we still need to provide a list of outputs. + outputs = + let + # Checks whether the redistributable provides an output. + hasOutput = + output: + attrsets.attrByPath + [ + redistArch + "outputs" + output + ] + false + featureRelease; + # Order is important here so we use a list. + additionalOutputs = builtins.filter hasOutput [ + "bin" + "lib" + "static" + "dev" + "doc" + "sample" + "python" + ]; + # The out output is special -- it's the default output and we always include it. + outputs = ["out"] ++ additionalOutputs; + in + outputs; + + # Traversed in the order of the outputs speficied in outputs; + # entries are skipped if they don't exist in outputs. + outputToPatterns = { + bin = ["bin"]; + lib = [ + "lib" + "lib64" + ]; + static = ["**/*.a"]; + sample = ["samples"]; + python = ["**/*.whl"]; + }; + + # Useful for introspecting why something went wrong. + # Maps descriptions of why the derivation would be marked broken to + # booleans indicating whether that description is true. + brokenConditions = {}; + + src = fetchurl { + url = "https://developer.download.nvidia.com/compute/${redistName}/redist/${ + redistribRelease.${redistArch}.relative_path + }"; + inherit (redistribRelease.${redistArch}) sha256; + }; + + # We do need some other phases, like configurePhase, so the multiple-output setup hook works. + dontBuild = true; + + nativeBuildInputs = [ + autoPatchelfHook + # This hook will make sure libcuda can be found + # in typically /lib/opengl-driver by adding that + # directory to the rpath of all ELF binaries. + # Check e.g. with `patchelf --print-rpath path/to/my/binary + autoAddOpenGLRunpathHook + markForCudatoolkitRootHook + ]; + + buildInputs = + [ + # autoPatchelfHook will search for a libstdc++ and we're giving it + # one that is compatible with the rest of nixpkgs, even when + # nvcc forces us to use an older gcc + # NB: We don't actually know if this is the right thing to do + stdenv.cc.cc.lib + ]; + + # Picked up by autoPatchelf + # Needed e.g. for libnvrtc to locate (dlopen) libnvrtc-builtins + appendRunpaths = ["$ORIGIN"]; + + # NOTE: We don't need to check for dev or doc, because those outputs are handled by + # the multiple-outputs setup hook. + # NOTE: moveToOutput operates on all outputs: + # https://github.com/NixOS/nixpkgs/blob/2920b6fc16a9ed5d51429e94238b28306ceda79e/pkgs/build-support/setup-hooks/multiple-outputs.sh#L105-L107 + installPhase = + let + mkMoveToOutputCommand = + output: + let + template = pattern: ''moveToOutput "${pattern}" "${"$" + output}"''; + patterns = finalAttrs.outputToPatterns.${output} or []; + in + strings.concatMapStringsSep "\n" template patterns; + in + # Pre-install hook + '' + runHook preInstall + '' + # Handle the existence of libPath, which requires us to re-arrange the lib directory + + strings.optionalString (libPath != null) '' + if [[ ! -d "${libPath}" ]] ; then + echo "${finalAttrs.pname}: ${libPath} does not exist, only found:" >&2 + find "$(dirname ${libPath})"/ -maxdepth 1 >&2 + echo "This release might not support your CUDA version" >&2 + exit 1 + fi + mv "lib/${libPath}" lib_new + rm -r lib + mv lib_new lib + '' + + '' + mkdir -p "$out" + mv * "$out" + ${strings.concatMapStringsSep "\n" mkMoveToOutputCommand (builtins.tail finalAttrs.outputs)} + runHook postInstall + ''; + + # libcuda needs to be resolved during runtime + # NOTE: Due to the use of __structuredAttrs, we can't use a list for autoPatchelfIgnoreMissingDeps, since it + # will take only the first value. Instead, we produce a string with the values separated by spaces. + # Using the `env` attribute ensures that the value is representable as one of the primitives allowed by + # bash's environment variables. + env.autoPatchelfIgnoreMissingDeps = "libcuda.so libcuda.so.*"; + + # The out output leverages the same functionality which backs the `symlinkJoin` function in + # Nixpkgs: + # https://github.com/NixOS/nixpkgs/blob/d8b2a92df48f9b08d68b0132ce7adfbdbc1fbfac/pkgs/build-support/trivial-builders/default.nix#L510 + # + # That should allow us to emulate "fat" default outputs without having to actually create them. + # + # It is important that this run after the autoPatchelfHook, otherwise the symlinks in out will reference libraries in lib, creating a circular dependency. + postPhases = ["postPatchelf"]; + + # For each output, create a symlink to it in the out output. + # NOTE: We must recreate the out output here, because the setup hook will have deleted it + # if it was empty. + postPatchelf = + let + # Note the double dollar sign -- we want to interpolate the variable in bash, not the string. + mkJoinWithOutOutputCommand = output: ''${meta.getExe lndir} "${"$" + output}" "$out"''; + in + '' + mkdir -p "$out" + ${strings.concatMapStringsSep "\n" mkJoinWithOutOutputCommand (builtins.tail finalAttrs.outputs)} + ''; + + # Make the CUDA-patched stdenv available + passthru.stdenv = backendStdenv; + + # Setting propagatedBuildInputs to false will prevent outputs known to the multiple-outputs + # from depending on `out` by default. + # https://github.com/NixOS/nixpkgs/blob/2920b6fc16a9ed5d51429e94238b28306ceda79e/pkgs/build-support/setup-hooks/multiple-outputs.sh#L196 + # Indeed, we want to do the opposite -- fat "out" outputs that contain all the other outputs. + propagatedBuildOutputs = false; + + # By default, if the dev output exists it just uses that. + # However, because we disabled propagatedBuildOutputs, dev doesn't contain libraries or + # anything of the sort. To remedy this, we set outputSpecified to true, and use + # outputsToInstall, which tells Nix which outputs to use when the package name is used + # unqualified (that is, without an explicit output). + outputSpecified = true; + + meta = { + description = "${redistribRelease.name}. By downloading and using the packages you accept the terms and conditions of the ${finalAttrs.meta.license.shortName}"; + sourceProvenance = [sourceTypes.binaryNativeCode]; + platforms = + lists.concatMap + ( + redistArch: + let + nixSystem = builtins.tryEval (flags.getNixSystem redistArch); + in + if nixSystem.success then [nixSystem.value] else [] + ) + supportedRedistArchs; + broken = lists.any trivial.id (attrsets.attrValues finalAttrs.brokenConditions); + license = licenses.unfree; + maintainers = teams.cuda.members; + # Force the use of the default, fat output by default (even though `dev` exists, which + # causes Nix to prefer that output over the others if outputSpecified isn't set). + outputsToInstall = ["out"]; + }; + } +) diff --git a/pkgs/development/cuda-modules/generic-builders/multiplex.nix b/pkgs/development/cuda-modules/generic-builders/multiplex.nix new file mode 100644 index 0000000000000..b8053094bcc82 --- /dev/null +++ b/pkgs/development/cuda-modules/generic-builders/multiplex.nix @@ -0,0 +1,131 @@ +{ + # callPackage-provided arguments + lib, + cudaVersion, + flags, + hostPlatform, + # Expected to be passed by the caller + mkVersionedPackageName, + # pname :: String + pname, + # releasesModule :: Path + # A path to a module which provides a `releases` attribute + releasesModule, + # shims :: Path + # A path to a module which provides a `shims` attribute + # The redistribRelease is only used in ./manifest.nix for the package version + # and the package description (which NVIDIA's manifest calls the "name"). + # It's also used for fetching the source, but we override that since we can't + # re-use that portion of the functionality (different URLs, etc.). + # The featureRelease is used to populate meta.platforms (by way of looking at the attribute names) + # and to determine the outputs of the package. + # shimFn :: {package, redistArch} -> AttrSet + shimsFn ? ({package, redistArch}: throw "shimsFn must be provided"), + # fixupFn :: Path + # A path (or nix expression) to be evaluated with callPackage and then + # provided to the package's overrideAttrs function. + # It must accept at least the following arguments: + # - final + # - cudaVersion + # - mkVersionedPackageName + # - package + fixupFn ? ( + { + final, + cudaVersion, + mkVersionedPackageName, + package, + ... + }: + throw "fixupFn must be provided" + ), +}: +let + inherit (lib) + attrsets + lists + modules + strings + ; + + evaluatedModules = modules.evalModules { + modules = [ + ../modules + releasesModule + ]; + }; + + # NOTE: Important types: + # - Releases: ../modules/${pname}/releases/releases.nix + # - Package: ../modules/${pname}/releases/package.nix + + # All releases across all platforms + # See ../modules/${pname}/releases/releases.nix + allReleases = evaluatedModules.config.${pname}.releases; + + # Compute versioned attribute name to be used in this package set + # Patch version changes should not break the build, so we only use major and minor + # computeName :: Package -> String + computeName = {version, ...}: mkVersionedPackageName pname version; + + # Check whether a package supports our CUDA version + # isSupported :: Package -> Bool + isSupported = + package: + strings.versionAtLeast cudaVersion package.minCudaVersion + && strings.versionAtLeast package.maxCudaVersion cudaVersion; + + # Get all of the packages for our given platform. + redistArch = flags.getRedistArch hostPlatform.system; + + # All the supported packages we can build for our platform. + # supportedPackages :: List (AttrSet Packages) + supportedPackages = builtins.filter isSupported (allReleases.${redistArch} or []); + + # newestToOldestSupportedPackage :: List (AttrSet Packages) + newestToOldestSupportedPackage = lists.reverseList supportedPackages; + + nameOfNewest = computeName (builtins.head newestToOldestSupportedPackage); + + # A function which takes the `final` overlay and the `package` being built and returns + # a function to be consumed via `overrideAttrs`. + overrideAttrsFixupFn = + final: package: + final.callPackage fixupFn { + inherit + final + cudaVersion + mkVersionedPackageName + package + ; + }; + + extension = + final: _: + let + # Builds our package into derivation and wraps it in a nameValuePair, where the name is the versioned name + # of the package. + buildPackage = + package: + let + shims = final.callPackage shimsFn {inherit package redistArch;}; + name = computeName package; + drv = final.callPackage ./manifest.nix { + inherit pname; + redistName = pname; + inherit (shims) redistribRelease featureRelease; + }; + fixedDrv = drv.overrideAttrs (overrideAttrsFixupFn final package); + in + attrsets.nameValuePair name fixedDrv; + + # versionedDerivations :: AttrSet Derivation + versionedDerivations = builtins.listToAttrs (lists.map buildPackage newestToOldestSupportedPackage); + + defaultDerivation = attrsets.optionalAttrs (versionedDerivations != {}) { + ${pname} = versionedDerivations.${nameOfNewest}; + }; + in + versionedDerivations // defaultDerivation; +in +extension -- cgit 1.4.1