From f1ddae47e3841560f97a1c3e17f1f42b54ec2d07 Mon Sep 17 00:00:00 2001
From: Someone Serge <sergei.kozlukov@aalto.fi>
Date: Wed, 3 Apr 2024 08:41:20 +0000
Subject: cudaPackages: make getOutput work again

(cherry picked from commit a2954dea377e749d9cff5f71f3815330a5907ee9)
---
 pkgs/development/cuda-modules/cuda/overrides.nix   | 19 +++++-----
 .../cuda-modules/generic-builders/manifest.nix     | 41 ++++++----------------
 2 files changed, 21 insertions(+), 39 deletions(-)

diff --git a/pkgs/development/cuda-modules/cuda/overrides.nix b/pkgs/development/cuda-modules/cuda/overrides.nix
index 5d23d8f7f2a1a..22fea42febdcd 100644
--- a/pkgs/development/cuda-modules/cuda/overrides.nix
+++ b/pkgs/development/cuda-modules/cuda/overrides.nix
@@ -166,13 +166,17 @@ filterAndCreateOverrides {
     };
 
   cuda_nvcc =
-    {
-      backendStdenv,
-      cuda_cudart,
-      lib,
-      setupCudaHook,
-    }:
+    { backendStdenv, setupCudaHook }:
     prevAttrs: {
+      # Merge "bin" and "dev" into "out" to avoid circular references
+      outputs = builtins.filter (
+        x:
+        !(builtins.elem x [
+          "dev"
+          "bin"
+        ])
+      ) prevAttrs.outputs;
+
       # Patch the nvcc.profile.
       # Syntax:
       # - `=` for assignment,
@@ -230,8 +234,7 @@ filterAndCreateOverrides {
       };
     };
 
-  cuda_nvprof =
-    { cuda_cupti }: prevAttrs: { buildInputs = prevAttrs.buildInputs ++ [ cuda_cupti.lib ]; };
+  cuda_nvprof = { cuda_cupti }: prevAttrs: { buildInputs = prevAttrs.buildInputs ++ [ cuda_cupti ]; };
 
   cuda_demo_suite =
     {
diff --git a/pkgs/development/cuda-modules/generic-builders/manifest.nix b/pkgs/development/cuda-modules/generic-builders/manifest.nix
index 006abb456cdc5..ccf1bb980ca3d 100644
--- a/pkgs/development/cuda-modules/generic-builders/manifest.nix
+++ b/pkgs/development/cuda-modules/generic-builders/manifest.nix
@@ -290,41 +290,23 @@ backendStdenv.mkDerivation (finalAttrs: {
     "libcuda.so.*"
   ];
 
-  # The out output leverages the same functionality which backs the `symlinkJoin` function in
-  # Nixpkgs:
-  # https://github.com/NixOS/nixpkgs/blob/d8b2a92df48f9b08d68b0132ce7adfbdbc1fbfac/pkgs/build-support/trivial-builders/default.nix#L510
-  #
-  # That should allow us to emulate "fat" default outputs without having to actually create them.
-  #
-  # It is important that this run after the autoPatchelfHook, otherwise the symlinks in out will reference libraries in lib, creating a circular dependency.
-  postPhases = [ "postPatchelf" ];
+  # _multioutPropagateDev() currently expects a space-separated string rather than an array
+  preFixup = ''
+    export propagatedBuildOutputs="''${propagatedBuildOutputs[@]}"
+  '';
+
+  # Propagate all outputs, including `static`
+  propagatedBuildOutputs = builtins.filter (x: x != "dev") finalAttrs.outputs;
 
-  # For each output, create a symlink to it in the out output.
-  # NOTE: We must recreate the out output here, because the setup hook will have deleted it if it was empty.
+  # Kept in case overrides assume postPhases have already been defined
+  postPhases = [ "postPatchelf" ];
   postPatchelf = ''
-    mkdir -p "$out"
-    for output in $(getAllOutputNames); do
-      if [[ "$output" != "out" ]]; then
-        ${meta.getExe lndir} "''${!output}" "$out"
-      fi
-    done
+    true
   '';
 
   # Make the CUDA-patched stdenv available
   passthru.stdenv = backendStdenv;
 
-  # Setting propagatedBuildInputs to false will prevent outputs known to the multiple-outputs
-  # from depending on `out` by default.
-  # https://github.com/NixOS/nixpkgs/blob/2920b6fc16a9ed5d51429e94238b28306ceda79e/pkgs/build-support/setup-hooks/multiple-outputs.sh#L196
-  # Indeed, we want to do the opposite -- fat "out" outputs that contain all the other outputs.
-  propagatedBuildOutputs = false;
-
-  # By default, if the dev output exists it just uses that.
-  # However, because we disabled propagatedBuildOutputs, dev doesn't contain libraries or
-  # anything of the sort. To remedy this, we set outputSpecified to true, and use
-  # outputsToInstall, which tells Nix which outputs to use when the package name is used
-  # unqualified (that is, without an explicit output).
-  outputSpecified = true;
 
   meta = {
     description = "${redistribRelease.name}. By downloading and using the packages you accept the terms and conditions of the ${finalAttrs.meta.license.shortName}";
@@ -343,8 +325,5 @@ backendStdenv.mkDerivation (finalAttrs: {
       lists.optionals isBadPlatform finalAttrs.meta.platforms;
     license = licenses.unfree;
     maintainers = teams.cuda.members;
-    # Force the use of the default, fat output by default (even though `dev` exists, which
-    # causes Nix to prefer that output over the others if outputSpecified isn't set).
-    outputsToInstall = [ "out" ];
   };
 })
-- 
cgit 1.4.1


From 82018339bd1a587e63f5a12c8517252f334220e0 Mon Sep 17 00:00:00 2001
From: Someone Serge <sergei.kozlukov@aalto.fi>
Date: Fri, 28 Jun 2024 01:09:23 +0000
Subject: treewide: cuda: use propagatedBuildInputs, lib.getOutput

---
 pkgs/by-name/gp/gpt4all/package.nix                |  9 ++--
 pkgs/by-name/ll/llama-cpp/package.nix              | 10 ++--
 pkgs/by-name/ol/ollama/package.nix                 | 17 +++---
 pkgs/development/cuda-modules/cuda/overrides.nix   | 22 ++++----
 .../cuda-modules/generic-builders/manifest.nix     |  1 -
 .../cuda-modules/nccl-tests/default.nix            |  4 +-
 pkgs/development/cuda-modules/nccl/default.nix     |  2 +-
 pkgs/development/libraries/dlib/default.nix        | 24 +++------
 pkgs/development/libraries/opencv/4.x.nix          | 21 +++-----
 pkgs/development/libraries/openmpi/default.nix     |  2 +-
 .../libraries/science/math/faiss/default.nix       | 27 ++++------
 .../libraries/science/math/magma/generic.nix       | 16 +++---
 .../libraries/science/math/suitesparse/default.nix | 14 +++--
 .../science/math/tiny-cuda-nn/default.nix          | 18 +++----
 pkgs/development/libraries/ucx/default.nix         |  2 +-
 pkgs/development/python-modules/cupy/default.nix   |  3 +-
 pkgs/development/python-modules/jaxlib/bin.nix     | 12 ++---
 pkgs/development/python-modules/jaxlib/default.nix | 62 +++++++++++++---------
 .../python-modules/tensorflow/default.nix          | 51 ++++++++++++------
 pkgs/development/python-modules/torch/default.nix  | 49 +++++++----------
 pkgs/development/python-modules/vllm/default.nix   |  6 +--
 .../python-modules/xformers/default.nix            | 10 ++--
 pkgs/tools/audio/openai-whisper-cpp/default.nix    | 15 ++----
 23 files changed, 182 insertions(+), 215 deletions(-)

diff --git a/pkgs/by-name/gp/gpt4all/package.nix b/pkgs/by-name/gp/gpt4all/package.nix
index fbf652297a25e..60a6e05de211b 100644
--- a/pkgs/by-name/gp/gpt4all/package.nix
+++ b/pkgs/by-name/gp/gpt4all/package.nix
@@ -47,12 +47,9 @@ stdenv.mkDerivation (finalAttrs: {
   ] ++ lib.optionals cudaSupport (
       with cudaPackages;
       [
-        cuda_cccl.dev
-        cuda_cudart.dev
-        cuda_cudart.lib
-        cuda_cudart.static
-        libcublas.dev
-        libcublas.lib
+        cuda_cccl
+        cuda_cudart
+        libcublas
       ]);
 
   cmakeFlags = [
diff --git a/pkgs/by-name/ll/llama-cpp/package.nix b/pkgs/by-name/ll/llama-cpp/package.nix
index 254f8c2a51f6f..016b7fbdef20b 100644
--- a/pkgs/by-name/ll/llama-cpp/package.nix
+++ b/pkgs/by-name/ll/llama-cpp/package.nix
@@ -46,16 +46,12 @@ let
     ++ optionals metalSupport [ MetalKit ];
 
    cudaBuildInputs = with cudaPackages; [
-    cuda_cccl.dev # <nv/target>
+    cuda_cccl # <nv/target>
 
     # A temporary hack for reducing the closure size, remove once cudaPackages
     # have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
-    cuda_cudart.dev
-    cuda_cudart.lib
-    cuda_cudart.static
-    libcublas.dev
-    libcublas.lib
-    libcublas.static
+    cuda_cudart
+    libcublas
   ];
 
   rocmBuildInputs = with rocmPackages; [
diff --git a/pkgs/by-name/ol/ollama/package.nix b/pkgs/by-name/ol/ollama/package.nix
index 94de36bce94d6..1cc8fdb60c386 100644
--- a/pkgs/by-name/ol/ollama/package.nix
+++ b/pkgs/by-name/ol/ollama/package.nix
@@ -101,12 +101,12 @@ let
   };
 
   cudaToolkit = buildEnv {
-    name = "cuda-toolkit";
-    ignoreCollisions = true; # FIXME: find a cleaner way to do this without ignoring collisions
+    name = "cuda-merged";
     paths = [
-      cudaPackages.cudatoolkit
-      cudaPackages.cuda_cudart
-      cudaPackages.cuda_cudart.static
+      (lib.getBin (cudaPackages.cuda_nvcc.__spliced.buildHost or cudaPackages.cuda_nvcc))
+      (lib.getLib cudaPackages.cuda_cudart)
+      (lib.getOutput "static" cudaPackages.cuda_cudart)
+      (lib.getLib cudaPackages.libcublas)
     ];
   };
 
@@ -140,10 +140,6 @@ in
 goBuild ((lib.optionalAttrs enableRocm {
   ROCM_PATH = rocmPath;
   CLBlast_DIR = "${clblast}/lib/cmake/CLBlast";
-}) // (lib.optionalAttrs enableCuda {
-  CUDA_LIB_DIR = "${cudaToolkit}/lib";
-  CUDACXX = "${cudaToolkit}/bin/nvcc";
-  CUDAToolkit_ROOT = cudaToolkit;
 }) // {
   inherit pname version src vendorHash;
 
@@ -151,6 +147,8 @@ goBuild ((lib.optionalAttrs enableRocm {
     cmake
   ] ++ lib.optionals enableRocm [
     rocmPackages.llvm.bintools
+  ] ++ lib.optionals enableCuda [
+    cudaPackages.cuda_nvcc
   ] ++ lib.optionals (enableRocm || enableCuda) [
     makeWrapper
   ] ++ lib.optionals stdenv.isDarwin
@@ -160,6 +158,7 @@ goBuild ((lib.optionalAttrs enableRocm {
     (rocmLibs ++ [ libdrm ])
   ++ lib.optionals enableCuda [
     cudaPackages.cuda_cudart
+    cudaPackages.libcublas
   ] ++ lib.optionals stdenv.isDarwin
     metalFrameworks;
 
diff --git a/pkgs/development/cuda-modules/cuda/overrides.nix b/pkgs/development/cuda-modules/cuda/overrides.nix
index 22fea42febdcd..00ebb5535c690 100644
--- a/pkgs/development/cuda-modules/cuda/overrides.nix
+++ b/pkgs/development/cuda-modules/cuda/overrides.nix
@@ -44,7 +44,7 @@ filterAndCreateOverrides {
     }:
     prevAttrs: {
       buildInputs = prevAttrs.buildInputs ++ [
-        libcublas.lib
+        libcublas
         numactl
         rdma-core
       ];
@@ -66,17 +66,17 @@ filterAndCreateOverrides {
       buildInputs =
         prevAttrs.buildInputs
         # Always depends on this
-        ++ [ libcublas.lib ]
+        ++ [ libcublas ]
         # Dependency from 12.0 and on
-        ++ lib.lists.optionals (cudaAtLeast "12.0") [ libnvjitlink.lib ]
+        ++ lib.lists.optionals (cudaAtLeast "12.0") [ libnvjitlink ]
         # Dependency from 12.1 and on
-        ++ lib.lists.optionals (cudaAtLeast "12.1") [ libcusparse.lib ];
+        ++ lib.lists.optionals (cudaAtLeast "12.1") [ libcusparse ];
 
       brokenConditions = prevAttrs.brokenConditions // {
         "libnvjitlink missing (CUDA >= 12.0)" =
-          !(cudaAtLeast "12.0" -> (libnvjitlink != null && libnvjitlink.lib != null));
+          !(cudaAtLeast "12.0" -> (libnvjitlink != null && libnvjitlink != null));
         "libcusparse missing (CUDA >= 12.1)" =
-          !(cudaAtLeast "12.1" -> (libcusparse != null && libcusparse.lib != null));
+          !(cudaAtLeast "12.1" -> (libcusparse != null && libcusparse != null));
       };
     };
 
@@ -90,16 +90,16 @@ filterAndCreateOverrides {
       buildInputs =
         prevAttrs.buildInputs
         # Dependency from 12.0 and on
-        ++ lib.lists.optionals (cudaAtLeast "12.0") [ libnvjitlink.lib ];
+        ++ lib.lists.optionals (cudaAtLeast "12.0") [ libnvjitlink ];
 
       brokenConditions = prevAttrs.brokenConditions // {
         "libnvjitlink missing (CUDA >= 12.0)" =
-          !(cudaAtLeast "12.0" -> (libnvjitlink != null && libnvjitlink.lib != null));
+          !(cudaAtLeast "12.0" -> (libnvjitlink != null && libnvjitlink != null));
       };
     };
 
   # TODO(@connorbaker): cuda_cudart.dev depends on crt/host_config.h, which is from
-  # cuda_nvcc.dev. It would be nice to be able to encode that.
+  # (getDev cuda_nvcc). It would be nice to be able to encode that.
   cuda_cudart =
     { addDriverRunpath, lib }:
     prevAttrs: {
@@ -248,8 +248,8 @@ filterAndCreateOverrides {
     prevAttrs: {
       buildInputs = prevAttrs.buildInputs ++ [
         freeglut
-        libcufft.lib
-        libcurand.lib
+        libcufft
+        libcurand
         libGLU
         libglvnd
         mesa
diff --git a/pkgs/development/cuda-modules/generic-builders/manifest.nix b/pkgs/development/cuda-modules/generic-builders/manifest.nix
index ccf1bb980ca3d..4af16c8921cb2 100644
--- a/pkgs/development/cuda-modules/generic-builders/manifest.nix
+++ b/pkgs/development/cuda-modules/generic-builders/manifest.nix
@@ -307,7 +307,6 @@ backendStdenv.mkDerivation (finalAttrs: {
   # Make the CUDA-patched stdenv available
   passthru.stdenv = backendStdenv;
 
-
   meta = {
     description = "${redistribRelease.name}. By downloading and using the packages you accept the terms and conditions of the ${finalAttrs.meta.license.shortName}";
     sourceProvenance = [ sourceTypes.binaryNativeCode ];
diff --git a/pkgs/development/cuda-modules/nccl-tests/default.nix b/pkgs/development/cuda-modules/nccl-tests/default.nix
index e1f4eed7fae46..84575234a7691 100644
--- a/pkgs/development/cuda-modules/nccl-tests/default.nix
+++ b/pkgs/development/cuda-modules/nccl-tests/default.nix
@@ -45,11 +45,11 @@ backendStdenv.mkDerivation (finalAttrs: {
     [ nccl ]
     ++ lib.optionals (lib.versionOlder cudaVersion "11.4") [ cudatoolkit ]
     ++ lib.optionals (lib.versionAtLeast cudaVersion "11.4") [
-      cuda_nvcc.dev # crt/host_config.h
+      cuda_nvcc # crt/host_config.h
       cuda_cudart
     ]
     ++ lib.optionals (lib.versionAtLeast cudaVersion "12.0") [
-      cuda_cccl.dev # <nv/target>
+      cuda_cccl # <nv/target>
     ]
     ++ lib.optionals mpiSupport [ mpi ];
 
diff --git a/pkgs/development/cuda-modules/nccl/default.nix b/pkgs/development/cuda-modules/nccl/default.nix
index dd767d2781f06..e838bac3abc3c 100644
--- a/pkgs/development/cuda-modules/nccl/default.nix
+++ b/pkgs/development/cuda-modules/nccl/default.nix
@@ -54,7 +54,7 @@ backendStdenv.mkDerivation (finalAttrs: {
   buildInputs =
     lib.optionals (cudaOlder "11.4") [ cudatoolkit ]
     ++ lib.optionals (cudaAtLeast "11.4") [
-      cuda_nvcc.dev # crt/host_config.h
+      cuda_nvcc # crt/host_config.h
       cuda_cudart
     ]
     # NOTE: CUDA versions in Nixpkgs only use a major and minor version. When we do comparisons
diff --git a/pkgs/development/libraries/dlib/default.nix b/pkgs/development/libraries/dlib/default.nix
index e7915ad652144..cf120405a5fa6 100644
--- a/pkgs/development/libraries/dlib/default.nix
+++ b/pkgs/development/libraries/dlib/default.nix
@@ -57,23 +57,13 @@
   ]
   ++ lib.optionals guiSupport [ libX11 ]
   ++ lib.optionals cudaSupport (with cudaPackages; [
-    cuda_cudart.dev
-    cuda_cudart.lib
-    cuda_cudart.static
-    cuda_nvcc.dev
-    libcublas.dev
-    libcublas.lib
-    libcublas.static
-    libcurand.dev
-    libcurand.lib
-    libcurand.static
-    libcusolver.dev
-    libcusolver.lib
-    libcusolver.static
-    cudnn.dev
-    cudnn.lib
-    cudnn.static
-    cuda_cccl.dev
+    cuda_cudart
+    cuda_nvcc
+    libcublas
+    libcurand
+    libcusolver
+    cudnn
+    cuda_cccl
   ]);
 
   passthru = {
diff --git a/pkgs/development/libraries/opencv/4.x.nix b/pkgs/development/libraries/opencv/4.x.nix
index a4961163c9ad3..e2509ca5199a1 100644
--- a/pkgs/development/libraries/opencv/4.x.nix
+++ b/pkgs/development/libraries/opencv/4.x.nix
@@ -372,28 +372,19 @@ effectiveStdenv.mkDerivation {
     doxygen
     graphviz-nox
   ] ++ lib.optionals enableCuda (with cudaPackages; [
-    cuda_cudart.lib
-    cuda_cudart.dev
-    cuda_cccl.dev # <thrust/*>
-    libnpp.dev # npp.h
-    libnpp.lib
-    libnpp.static
+    cuda_cudart
+    cuda_cccl # <thrust/*>
+    libnpp # npp.h
     nvidia-optical-flow-sdk
   ] ++ lib.optionals enableCublas [
     # May start using the default $out instead once
     # https://github.com/NixOS/nixpkgs/issues/271792
     # has been addressed
-    libcublas.static
-    libcublas.lib
-    libcublas.dev # cublas_v2.h
+    libcublas # cublas_v2.h
   ] ++ lib.optionals enableCudnn [
-    cudnn.dev # cudnn.h
-    cudnn.lib
-    cudnn.static
+    cudnn # cudnn.h
   ] ++ lib.optionals enableCufft [
-    libcufft.dev # cufft.h
-    libcufft.lib
-    libcufft.static
+    libcufft # cufft.h
   ]);
 
   propagatedBuildInputs = lib.optionals enablePython [ pythonPackages.numpy ];
diff --git a/pkgs/development/libraries/openmpi/default.nix b/pkgs/development/libraries/openmpi/default.nix
index 8c54c13c9139e..59f2d21d4c482 100644
--- a/pkgs/development/libraries/openmpi/default.nix
+++ b/pkgs/development/libraries/openmpi/default.nix
@@ -63,7 +63,7 @@ stdenv.mkDerivation rec {
     # TODO: add UCX support, which is recommended to use with cuda for the most robust OpenMPI build
     # https://github.com/openucx/ucx
     # https://www.open-mpi.org/faq/?category=buildcuda
-    ++ lib.optionals cudaSupport [ "--with-cuda=${cudaPackages.cuda_cudart}" "--enable-dlopen" ]
+    ++ lib.optionals cudaSupport [ "--with-cuda=${lib.getDev cudaPackages.cuda_cudart}" "--enable-dlopen" ]
     ++ lib.optionals fabricSupport [ "--with-psm2=${lib.getDev libpsm2}" "--with-libfabric=${lib.getDev libfabric}" ]
     ;
 
diff --git a/pkgs/development/libraries/science/math/faiss/default.nix b/pkgs/development/libraries/science/math/faiss/default.nix
index 5af73735fdf60..1dfe814427889 100644
--- a/pkgs/development/libraries/science/math/faiss/default.nix
+++ b/pkgs/development/libraries/science/math/faiss/default.nix
@@ -33,19 +33,15 @@ let
 
   stdenv = if cudaSupport then backendStdenv else inputs.stdenv;
 
-  cudaJoined = symlinkJoin {
-    name = "cuda-packages-unsplit";
-    paths = with cudaPackages; [
-      cuda_cudart # cuda_runtime.h
-      libcublas
-      libcurand
-      cuda_cccl
-    ] ++ lib.optionals (cudaPackages ? cuda_profiler_api) [
-      cuda_profiler_api # cuda_profiler_api.h
-    ] ++ lib.optionals (!(cudaPackages ? cuda_profiler_api)) [
-      cuda_nvprof # cuda_profiler_api.h
-    ];
-  };
+  cudaComponents = with cudaPackages; [
+    cuda_cudart # cuda_runtime.h
+    libcublas
+    libcurand
+    cuda_cccl
+
+    # cuda_profiler_api.h
+    (cudaPackages.cuda_profiler_api or cudaPackages.cuda_nvprof)
+  ];
 in
 stdenv.mkDerivation {
   inherit pname version;
@@ -68,9 +64,7 @@ stdenv.mkDerivation {
     pythonPackages.wheel
   ] ++ lib.optionals stdenv.cc.isClang [
     llvmPackages.openmp
-  ] ++ lib.optionals cudaSupport [
-    cudaJoined
-  ];
+  ] ++ lib.optionals cudaSupport cudaComponents;
 
   propagatedBuildInputs = lib.optionals pythonSupport [
     pythonPackages.numpy
@@ -93,7 +87,6 @@ stdenv.mkDerivation {
     "-DFAISS_OPT_LEVEL=${optLevel}"
   ] ++ lib.optionals cudaSupport [
     "-DCMAKE_CUDA_ARCHITECTURES=${flags.cmakeCudaArchitecturesString}"
-    "-DCUDAToolkit_INCLUDE_DIR=${cudaJoined}/include"
   ];
 
   buildFlags = [
diff --git a/pkgs/development/libraries/science/math/magma/generic.nix b/pkgs/development/libraries/science/math/magma/generic.nix
index a675142f361d4..08dac598f7d91 100644
--- a/pkgs/development/libraries/science/math/magma/generic.nix
+++ b/pkgs/development/libraries/science/math/magma/generic.nix
@@ -134,19 +134,15 @@ stdenv.mkDerivation {
     blas
     python3
   ] ++ lists.optionals cudaSupport (with effectiveCudaPackages; [
-    cuda_cudart.dev # cuda_runtime.h
-    cuda_cudart.lib # cudart
-    cuda_cudart.static # cudart_static
-    libcublas.dev # cublas_v2.h
-    libcublas.lib # cublas
-    libcusparse.dev # cusparse.h
-    libcusparse.lib # cusparse
+    cuda_cudart # cuda_runtime.h
+    libcublas # cublas_v2.h
+    libcusparse # cusparse.h
   ] ++ lists.optionals (cudaOlder "11.8") [
-    cuda_nvprof.dev # <cuda_profiler_api.h>
+    cuda_nvprof # <cuda_profiler_api.h>
   ] ++ lists.optionals (cudaAtLeast "11.8") [
-    cuda_profiler_api.dev # <cuda_profiler_api.h>
+    cuda_profiler_api # <cuda_profiler_api.h>
   ] ++ lists.optionals (cudaAtLeast "12.0") [
-    cuda_cccl.dev # <nv/target>
+    cuda_cccl # <nv/target>
   ]) ++ lists.optionals rocmSupport [
     rocmPackages.clr
     rocmPackages.hipblas
diff --git a/pkgs/development/libraries/science/math/suitesparse/default.nix b/pkgs/development/libraries/science/math/suitesparse/default.nix
index fcfd9b56a1b14..43623e8604b9b 100644
--- a/pkgs/development/libraries/science/math/suitesparse/default.nix
+++ b/pkgs/development/libraries/science/math/suitesparse/default.nix
@@ -36,17 +36,15 @@ stdenv.mkDerivation rec {
   buildInputs = assert (blas.isILP64 == lapack.isILP64); [
     blas lapack
     metis
-    gfortran.cc.lib
+    (lib.getLib gfortran.cc)
     gmp
     mpfr
   ] ++ lib.optionals stdenv.cc.isClang [
     openmp
   ] ++ lib.optionals enableCuda [
-    cudaPackages.cuda_cudart.dev
-    cudaPackages.cuda_cudart.lib
-    cudaPackages.cuda_cccl.dev
-    cudaPackages.libcublas.dev
-    cudaPackages.libcublas.lib
+    cudaPackages.cuda_cudart
+    cudaPackages.cuda_cccl
+    cudaPackages.libcublas
   ];
 
   preConfigure = ''
@@ -63,8 +61,8 @@ stdenv.mkDerivation rec {
     "CFLAGS=-DBLAS64"
   ] ++ lib.optionals enableCuda [
     "CUDA_PATH=${cudaPackages.cuda_nvcc}"
-    "CUDART_LIB=${cudaPackages.cuda_cudart.lib}/lib/libcudart.so"
-    "CUBLAS_LIB=${cudaPackages.libcublas.lib}/lib/libcublas.so"
+    "CUDART_LIB=${lib.getLib cudaPackages.cuda_cudart}/lib/libcudart.so"
+    "CUBLAS_LIB=${lib.getLib cudaPackages.libcublas}/lib/libcublas.so"
   ] ++ lib.optionals stdenv.isDarwin [
     # Unless these are set, the build will attempt to use `Accelerate` on darwin, see:
     # https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/v5.13.0/SuiteSparse_config/SuiteSparse_config.mk#L368
diff --git a/pkgs/development/libraries/science/math/tiny-cuda-nn/default.nix b/pkgs/development/libraries/science/math/tiny-cuda-nn/default.nix
index e9367d416e325..e20365b215dcc 100644
--- a/pkgs/development/libraries/science/math/tiny-cuda-nn/default.nix
+++ b/pkgs/development/libraries/science/math/tiny-cuda-nn/default.nix
@@ -14,15 +14,15 @@
   inherit (cudaPackages) backendStdenv flags;
 
   cuda-common-redist = with cudaPackages; [
-    cuda_cudart.dev # cuda_runtime.h
-    cuda_cudart.lib
-    cuda_cccl.dev # <nv/target>
-    libcublas.dev # cublas_v2.h
-    libcublas.lib
-    libcusolver.dev # cusolverDn.h
-    libcusolver.lib
-    libcusparse.dev # cusparse.h
-    libcusparse.lib
+    (lib.getDev cuda_cudart) # cuda_runtime.h
+    (lib.getLib cuda_cudart)
+    (lib.getDev cuda_cccl) # <nv/target>
+    (lib.getDev libcublas) # cublas_v2.h
+    (lib.getLib libcublas)
+    (lib.getDev libcusolver) # cusolverDn.h
+    (lib.getLib libcusolver)
+    (lib.getDev libcusparse) # cusparse.h
+    (lib.getLib libcusparse)
   ];
 
   cuda-native-redist = symlinkJoin {
diff --git a/pkgs/development/libraries/ucx/default.nix b/pkgs/development/libraries/ucx/default.nix
index 464ea6e374c37..8f134cd594e3c 100644
--- a/pkgs/development/libraries/ucx/default.nix
+++ b/pkgs/development/libraries/ucx/default.nix
@@ -55,7 +55,7 @@ stdenv.mkDerivation rec {
 
   LDFLAGS = lib.optionals enableCuda [
     # Fake libnvidia-ml.so (the real one is deployed impurely)
-    "-L${cudaPackages.cuda_nvml_dev}/lib/stubs"
+    "-L${lib.getLib cudaPackages.cuda_nvml_dev}/lib/stubs"
   ];
 
   configureFlags = [
diff --git a/pkgs/development/python-modules/cupy/default.nix b/pkgs/development/python-modules/cupy/default.nix
index 2388e215d929e..6311a7d099e79 100644
--- a/pkgs/development/python-modules/cupy/default.nix
+++ b/pkgs/development/python-modules/cupy/default.nix
@@ -21,9 +21,8 @@ let
     name = "cudatoolkit-joined-${cudaPackages.cudaVersion}";
     paths = with cudaPackages; [
       cuda_cccl # <nv/target>
-      cuda_cccl.dev
       cuda_cudart
-      cuda_nvcc.dev # <crt/host_defines.h>
+      cuda_nvcc # <crt/host_defines.h>
       cuda_nvprof
       cuda_nvrtc
       cuda_nvtx
diff --git a/pkgs/development/python-modules/jaxlib/bin.nix b/pkgs/development/python-modules/jaxlib/bin.nix
index 5d4943a97ced4..c27a5231d3c5a 100644
--- a/pkgs/development/python-modules/jaxlib/bin.nix
+++ b/pkgs/development/python-modules/jaxlib/bin.nix
@@ -34,12 +34,12 @@ let
   cudaLibPath = lib.makeLibraryPath (
     with cudaPackages;
     [
-      cuda_cudart.lib # libcudart.so
-      cuda_cupti.lib # libcupti.so
-      cudnn.lib # libcudnn.so
-      libcufft.lib # libcufft.so
-      libcusolver.lib # libcusolver.so
-      libcusparse.lib # libcusparse.so
+      (lib.getLib cuda_cudart) # libcudart.so
+      (lib.getLib cuda_cupti) # libcupti.so
+      (lib.getLib cudnn) # libcudnn.so
+      (lib.getLib libcufft) # libcufft.so
+      (lib.getLib libcusolver) # libcusolver.so
+      (lib.getLib libcusparse) # libcusparse.so
     ]
   );
 
diff --git a/pkgs/development/python-modules/jaxlib/default.nix b/pkgs/development/python-modules/jaxlib/default.nix
index b77a7de7b3575..02577cac890eb 100644
--- a/pkgs/development/python-modules/jaxlib/default.nix
+++ b/pkgs/development/python-modules/jaxlib/default.nix
@@ -55,7 +55,6 @@ let
   inherit (cudaPackages)
     cudaFlags
     cudaVersion
-    cudnn
     nccl
     ;
 
@@ -80,18 +79,26 @@ let
     broken = effectiveStdenv.isDarwin || nccl.meta.unsupported;
   };
 
+  # Bazel wants a merged cudnn at configuration time
+  cudnnMerged = symlinkJoin {
+    name = "cudnn-merged";
+    paths = with cudaPackages; [
+      (lib.getDev cudnn)
+      (lib.getLib cudnn)
+    ];
+  };
+
   # These are necessary at build time and run time.
   cuda_libs_joined = symlinkJoin {
     name = "cuda-joined";
     paths = with cudaPackages; [
-      cuda_cudart.lib # libcudart.so
-      cuda_cudart.static # libcudart_static.a
-      cuda_cupti.lib # libcupti.so
-      libcublas.lib # libcublas.so
-      libcufft.lib # libcufft.so
-      libcurand.lib # libcurand.so
-      libcusolver.lib # libcusolver.so
-      libcusparse.lib # libcusparse.so
+      (lib.getLib cuda_cudart) # libcudart.so
+      (lib.getLib cuda_cupti) # libcupti.so
+      (lib.getLib libcublas) # libcublas.so
+      (lib.getLib libcufft) # libcufft.so
+      (lib.getLib libcurand) # libcurand.so
+      (lib.getLib libcusolver) # libcusolver.so
+      (lib.getLib libcusparse) # libcusparse.so
     ];
   };
   # These are only necessary at build time.
@@ -101,20 +108,23 @@ let
       cuda_libs_joined
 
       # Binaries
-      cudaPackages.cuda_nvcc.bin # nvcc
+      (lib.getBin cuda_nvcc) # nvcc
+
+      # Archives
+      (lib.getOutput "static" cuda_cudart) # libcudart_static.a
 
       # Headers
-      cuda_cccl.dev # block_load.cuh
-      cuda_cudart.dev # cuda.h
-      cuda_cupti.dev # cupti.h
-      cuda_nvcc.dev # See https://github.com/google/jax/issues/19811
-      cuda_nvml_dev # nvml.h
-      cuda_nvtx.dev # nvToolsExt.h
-      libcublas.dev # cublas_api.h
-      libcufft.dev # cufft.h
-      libcurand.dev # curand.h
-      libcusolver.dev # cusolver_common.h
-      libcusparse.dev # cusparse.h
+      (lib.getDev cuda_cccl) # block_load.cuh
+      (lib.getDev cuda_cudart) # cuda.h
+      (lib.getDev cuda_cupti) # cupti.h
+      (lib.getDev cuda_nvcc) # See https://github.com/google/jax/issues/19811
+      (lib.getDev cuda_nvml_dev) # nvml.h
+      (lib.getDev cuda_nvtx) # nvToolsExt.h
+      (lib.getDev libcublas) # cublas_api.h
+      (lib.getDev libcufft) # cufft.h
+      (lib.getDev libcurand) # curand.h
+      (lib.getDev libcusolver) # cusolver_common.h
+      (lib.getDev libcusparse) # cusparse.h
     ];
   };
 
@@ -308,10 +318,10 @@ let
       + lib.optionalString cudaSupport ''
         build --config=cuda
         build --action_env CUDA_TOOLKIT_PATH="${cuda_build_deps_joined}"
-        build --action_env CUDNN_INSTALL_PATH="${cudnn}"
-        build --action_env TF_CUDA_PATHS="${cuda_build_deps_joined},${cudnn},${nccl}"
+        build --action_env CUDNN_INSTALL_PATH="${cudnnMerged}"
+        build --action_env TF_CUDA_PATHS="${cuda_build_deps_joined},${cudnnMerged},${lib.getDev nccl}"
         build --action_env TF_CUDA_VERSION="${lib.versions.majorMinor cudaVersion}"
-        build --action_env TF_CUDNN_VERSION="${lib.versions.major cudnn.version}"
+        build --action_env TF_CUDNN_VERSION="${lib.versions.major cudaPackages.cudnn.version}"
         build:cuda --action_env TF_CUDA_COMPUTE_CAPABILITIES="${builtins.concatStringsSep "," cudaFlags.realArches}"
       ''
       +
@@ -431,13 +441,13 @@ buildPythonPackage {
   # for more info.
   postInstall = lib.optionalString cudaSupport ''
     mkdir -p $out/bin
-    ln -s ${cudaPackages.cuda_nvcc.bin}/bin/ptxas $out/bin/ptxas
+    ln -s ${lib.getExe' cudaPackages.cuda_nvcc "ptxas"} $out/bin/ptxas
 
     find $out -type f \( -name '*.so' -or -name '*.so.*' \) | while read lib; do
       patchelf --add-rpath "${
         lib.makeLibraryPath [
           cuda_libs_joined
-          cudnn
+          (lib.getLib cudaPackages.cudnn)
           nccl
         ]
       }" "$lib"
diff --git a/pkgs/development/python-modules/tensorflow/default.nix b/pkgs/development/python-modules/tensorflow/default.nix
index d311edc188ad6..5f41420dffbca 100644
--- a/pkgs/development/python-modules/tensorflow/default.nix
+++ b/pkgs/development/python-modules/tensorflow/default.nix
@@ -116,7 +116,13 @@ let
   # cudaPackages.cudnn led to this:
   # https://github.com/tensorflow/tensorflow/issues/60398
   cudnnAttribute = "cudnn_8_6";
-  cudnn = cudaPackages.${cudnnAttribute};
+  cudnnMerged = symlinkJoin {
+    name = "cudnn-merged";
+    paths = [
+      (lib.getDev cudaPackages.${cudnnAttribute})
+      (lib.getLib cudaPackages.${cudnnAttribute})
+    ];
+  };
   gentoo-patches = fetchzip {
     url = "https://dev.gentoo.org/~perfinion/patches/tensorflow-patches-2.12.0.tar.bz2";
     hash = "sha256-SCRX/5/zML7LmKEPJkcM5Tebez9vv/gmE4xhT/jyqWs=";
@@ -130,19 +136,30 @@ let
 
   withTensorboard = (pythonOlder "3.6") || tensorboardSupport;
 
-  # FIXME: migrate to redist cudaPackages
-  cudatoolkit_joined = symlinkJoin {
-    name = "${cudatoolkit.name}-merged";
-    paths =
-      [
-        cudatoolkit.lib
-        cudatoolkit.out
-      ]
-      ++ lib.optionals (lib.versionOlder cudatoolkit.version "11") [
-        # for some reason some of the required libs are in the targets/x86_64-linux
-        # directory; not sure why but this works around it
-        "${cudatoolkit}/targets/${stdenv.system}"
-      ];
+  cudaComponents = with cudaPackages; [
+    (cuda_nvcc.__spliced.buildHost or cuda_nvcc)
+    (cuda_nvprune.__spliced.buildHost or cuda_nvprune)
+    cuda_cccl # block_load.cuh
+    cuda_cudart # cuda.h
+    cuda_cupti # cupti.h
+    cuda_nvcc # See https://github.com/google/jax/issues/19811
+    cuda_nvml_dev # nvml.h
+    cuda_nvtx # nvToolsExt.h
+    libcublas # cublas_api.h
+    libcufft # cufft.h
+    libcurand # curand.h
+    libcusolver # cusolver_common.h
+    libcusparse # cusparse.h
+  ];
+
+  cudatoolkitDevMerged = symlinkJoin {
+    name = "cuda-${cudaPackages.cudaVersion}-dev-merged";
+    paths = lib.concatMap (p: [
+      (lib.getBin p)
+      (lib.getDev p)
+      (lib.getLib p)
+      (lib.getOutput "static" p) # Makes for a very fat closure
+    ]) cudaComponents;
   };
 
   # Tensorflow expects bintools at hard-coded paths, e.g. /usr/bin/ar
@@ -321,7 +338,7 @@ let
       ]
       ++ lib.optionals cudaSupport [
         cudatoolkit
-        cudnn
+        cudnnMerged
       ]
       ++ lib.optionals mklSupport [ mkl ]
       ++ lib.optionals stdenv.isDarwin [
@@ -402,7 +419,7 @@ let
     TF_NEED_MPI = tfFeature cudaSupport;
 
     TF_NEED_CUDA = tfFeature cudaSupport;
-    TF_CUDA_PATHS = lib.optionalString cudaSupport "${cudatoolkit_joined},${cudnn},${nccl}";
+    TF_CUDA_PATHS = lib.optionalString cudaSupport "${cudatoolkitDevMerged},${cudnnMerged},${lib.getLib nccl}";
     TF_CUDA_COMPUTE_CAPABILITIES = lib.concatStringsSep "," cudaCapabilities;
 
     # Needed even when we override stdenv: e.g. for ar
@@ -653,7 +670,7 @@ buildPythonPackage {
     find $out -type f \( -name '*.so' -or -name '*.so.*' \) | while read lib; do
       addOpenGLRunpath "$lib"
 
-      patchelf --set-rpath "${cudatoolkit}/lib:${cudatoolkit.lib}/lib:${cudnn}/lib:${nccl}/lib:$(patchelf --print-rpath "$lib")" "$lib"
+      patchelf --set-rpath "${cudatoolkit}/lib:${cudatoolkit.lib}/lib:${cudnnMerged}/lib:${lib.getLib nccl}/lib:$(patchelf --print-rpath "$lib")" "$lib"
     done
   '';
 
diff --git a/pkgs/development/python-modules/torch/default.nix b/pkgs/development/python-modules/torch/default.nix
index 9597a047bdb48..cc08339f6e0ff 100644
--- a/pkgs/development/python-modules/torch/default.nix
+++ b/pkgs/development/python-modules/torch/default.nix
@@ -301,11 +301,11 @@ buildPythonPackage rec {
   preConfigure =
     lib.optionalString cudaSupport ''
       export TORCH_CUDA_ARCH_LIST="${gpuTargetString}"
-      export CUPTI_INCLUDE_DIR=${cudaPackages.cuda_cupti.dev}/include
-      export CUPTI_LIBRARY_DIR=${cudaPackages.cuda_cupti.lib}/lib
+      export CUPTI_INCLUDE_DIR=${lib.getDev cudaPackages.cuda_cupti}/include
+      export CUPTI_LIBRARY_DIR=${lib.getLib cudaPackages.cuda_cupti}/lib
     ''
     + lib.optionalString (cudaSupport && cudaPackages ? cudnn) ''
-      export CUDNN_INCLUDE_DIR=${cudnn.dev}/include
+      export CUDNN_INCLUDE_DIR=${lib.getLib cudnn}/include
       export CUDNN_LIB_DIR=${cudnn.lib}/lib
     ''
     + lib.optionalString rocmSupport ''
@@ -453,42 +453,31 @@ buildPythonPackage rec {
     ++ lib.optionals cudaSupport (
       with cudaPackages;
       [
-        cuda_cccl.dev # <thrust/*>
-        cuda_cudart.dev # cuda_runtime.h and libraries
-        cuda_cudart.lib
-        cuda_cudart.static
-        cuda_cupti.dev # For kineto
-        cuda_cupti.lib # For kineto
-        cuda_nvcc.dev # crt/host_config.h; even though we include this in nativeBuildinputs, it's needed here too
-        cuda_nvml_dev.dev # <nvml.h>
-        cuda_nvrtc.dev
-        cuda_nvrtc.lib
-        cuda_nvtx.dev
-        cuda_nvtx.lib # -llibNVToolsExt
-        libcublas.dev
-        libcublas.lib
-        libcufft.dev
-        libcufft.lib
-        libcurand.dev
-        libcurand.lib
-        libcusolver.dev
-        libcusolver.lib
-        libcusparse.dev
-        libcusparse.lib
+        cuda_cccl # <thrust/*>
+        cuda_cudart # cuda_runtime.h and libraries
+        cuda_cupti # For kineto
+        cuda_nvcc # crt/host_config.h; even though we include this in nativeBuildinputs, it's needed here too
+        cuda_nvml_dev # <nvml.h>
+        cuda_nvrtc
+        cuda_nvtx # -llibNVToolsExt
+        libcublas
+        libcufft
+        libcurand
+        libcusolver
+        libcusparse
       ]
       ++ lists.optionals (cudaPackages ? cudnn) [
-        cudnn.dev
-        cudnn.lib
+        cudnn
       ]
       ++ lists.optionals useSystemNccl [
         # Some platforms do not support NCCL (i.e., Jetson)
-        nccl.dev # Provides nccl.h AND a static copy of NCCL!
+        nccl # Provides nccl.h AND a static copy of NCCL!
       ]
       ++ lists.optionals (strings.versionOlder cudaVersion "11.8") [
-        cuda_nvprof.dev # <cuda_profiler_api.h>
+        cuda_nvprof # <cuda_profiler_api.h>
       ]
       ++ lists.optionals (strings.versionAtLeast cudaVersion "11.8") [
-        cuda_profiler_api.dev # <cuda_profiler_api.h>
+        cuda_profiler_api # <cuda_profiler_api.h>
       ]
     )
     ++ lib.optionals rocmSupport [ rocmPackages.llvm.openmp ]
diff --git a/pkgs/development/python-modules/vllm/default.nix b/pkgs/development/python-modules/vllm/default.nix
index 7ccab0a231293..2418f97452db8 100644
--- a/pkgs/development/python-modules/vllm/default.nix
+++ b/pkgs/development/python-modules/vllm/default.nix
@@ -100,9 +100,9 @@ buildPythonPackage rec {
       with cudaPackages;
       [
         cuda_cudart # cuda_runtime.h, -lcudart
-        cuda_cccl.dev # <thrust/*>
-        libcusparse.dev # cusparse.h
-        libcublas.dev # cublas_v2.h
+        cuda_cccl # <thrust/*>
+        libcusparse # cusparse.h
+        libcublas # cublas_v2.h
         libcusolver # cusolverDn.h
       ]
     ))
diff --git a/pkgs/development/python-modules/xformers/default.nix b/pkgs/development/python-modules/xformers/default.nix
index 261ff0cb89e8a..9a7b7bbef2b15 100644
--- a/pkgs/development/python-modules/xformers/default.nix
+++ b/pkgs/development/python-modules/xformers/default.nix
@@ -66,11 +66,11 @@ buildPythonPackage {
     [
       # flash-attn build
       cuda_cudart # cuda_runtime_api.h
-      libcusparse.dev # cusparse.h
-      cuda_cccl.dev # nv/target
-      libcublas.dev # cublas_v2.h
-      libcusolver.dev # cusolverDn.h
-      libcurand.dev # curand_kernel.h
+      libcusparse # cusparse.h
+      cuda_cccl # nv/target
+      libcublas # cublas_v2.h
+      libcusolver # cusolverDn.h
+      libcurand # curand_kernel.h
     ]
   );
 
diff --git a/pkgs/tools/audio/openai-whisper-cpp/default.nix b/pkgs/tools/audio/openai-whisper-cpp/default.nix
index 218872ed1e313..c74919dbcb0ed 100644
--- a/pkgs/tools/audio/openai-whisper-cpp/default.nix
+++ b/pkgs/tools/audio/openai-whisper-cpp/default.nix
@@ -57,21 +57,14 @@ effectiveStdenv.mkDerivation (finalAttrs: {
       CoreVideo
       MetalKit
     ] ++ lib.optionals cudaSupport ( with cudaPackages; [
-
-      # A temporary hack for reducing the closure size, remove once cudaPackages
-      # have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
-      cuda_cccl.dev # provides nv/target
-      cuda_cudart.dev
-      cuda_cudart.lib
-      cuda_cudart.static
-      libcublas.dev
-      libcublas.lib
-      libcublas.static
+      cuda_cccl # provides nv/target
+      cuda_cudart
+      libcublas
     ]);
 
   postPatch = let
     cudaOldStr = "-lcuda ";
-    cudaNewStr = "-lcuda -L${cudaPackages.cuda_cudart.lib}/lib/stubs ";
+    cudaNewStr = "-lcuda -L${cudaPackages.cuda_cudart}/lib/stubs ";
   in lib.optionalString cudaSupport ''
     substituteInPlace Makefile \
       --replace '${cudaOldStr}' '${cudaNewStr}'
-- 
cgit 1.4.1