From 8e800cedaf24f5ad9717463b809b0beef7677000 Mon Sep 17 00:00:00 2001
From: Connor Baker <connor.baker@tweag.io>
Date: Tue, 7 Nov 2023 14:35:11 +0000
Subject: cudaPackages: move derivations to cuda-modules & support aarch64

cudaPackages.cuda_compat: ignore missing libs provided at runtime

cudaPackages.gpus: Jetson should never build by default

cudaPackages.flags: don't build Jetson capabilities by default

cudaPackages: re-introduce filter for pre-existing CUDA redist packages in overrides

cudaPackages: only recurseIntoAttrs for the latest of each major version

cudaPackages.nvccCompatabilities: use GCC 10 through CUDA 11.5 to avoid a GLIBC incompatability

cudaPackages.cutensor: acquire libcublas through cudatoolkit prior to 11.4

cudaPackages.cuda_compat: mark as broken on aarch64-linux if not targeting Jetson

cudaPackages.cutensor_1_4: fix build

cudaPackages: adjust use of autoPatchelfIgnoreMissingDeps

cudaPackages.cuda_nvprof: remove unecessary override to add addOpenGLRunpath

cudaPackages: use getExe' to avoid patchelf warning about missing meta.mainProgram

cudaPackages: fix evaluation with Nix 2.3

cudaPackages: fix platform detection for Jetson/non-Jetson aarch64-linux

python3Packages.tensorrt: mark as broken if required packages are missing

Note: evaluating the name of the derivation will fail if tensorrt is not present,
which is why we wrap the value in `lib.optionalString`.

cudaPackages.flags.getNixSystem: add guard based on jetsonTargets

cudaPackages.cudnn: use explicit path to patchelf

cudaPackages.tensorrt: use explicit path to patchelf
---
 .../cudatoolkit/auto-add-opengl-runpath-hook.sh    |  28 --
 pkgs/development/compilers/cudatoolkit/common.nix  | 367 --------------------
 .../hooks/mark-for-cudatoolkit-root-hook.sh        |  14 -
 .../compilers/cudatoolkit/hooks/setup-cuda-hook.sh | 139 --------
 .../redist/build-cuda-redist-package.nix           | 178 ----------
 .../compilers/cudatoolkit/redist/extension.nix     | 139 --------
 .../compilers/cudatoolkit/redist/overrides.nix     | 119 -------
 .../compilers/cudatoolkit/saxpy/CMakeLists.txt     |  12 -
 .../compilers/cudatoolkit/saxpy/default.nix        |  52 ---
 .../compilers/cudatoolkit/saxpy/saxpy.cu           |  68 ----
 pkgs/development/compilers/cudatoolkit/stdenv.nix  |  33 --
 pkgs/development/cuda-modules/backend-stdenv.nix   |  39 +++
 pkgs/development/cuda-modules/cuda/extension.nix   | 101 ++++++
 pkgs/development/cuda-modules/cuda/overrides.nix   | 129 +++++++
 .../cuda-modules/cudatoolkit/default.nix           | 374 +++++++++++++++++++++
 pkgs/development/cuda-modules/cudnn/fixup.nix      |  69 ++++
 pkgs/development/cuda-modules/cudnn/shims.nix      |  13 +
 .../cuda-modules/cutensor/extension.nix            | 164 +++++++++
 pkgs/development/cuda-modules/flags.nix            |  49 +--
 .../cuda-modules/generic-builders/manifest.nix     | 249 ++++++++++++++
 .../cuda-modules/generic-builders/multiplex.nix    | 131 ++++++++
 pkgs/development/cuda-modules/gpus.nix             |   2 +-
 .../cuda-modules/modules/cuda/default.nix          |   1 +
 .../cuda-modules/modules/cudnn/default.nix         |  12 +
 .../cuda-modules/modules/cutensor/default.nix      |   1 +
 pkgs/development/cuda-modules/modules/default.nix  |  10 +
 .../cuda-modules/modules/generic/default.nix       |   7 +
 .../modules/generic/manifests/default.nix          |   7 +
 .../modules/generic/manifests/feature/manifest.nix |  10 +
 .../modules/generic/manifests/feature/outputs.nix  |  60 ++++
 .../modules/generic/manifests/feature/package.nix  |  10 +
 .../modules/generic/manifests/feature/release.nix  |  10 +
 .../generic/manifests/redistrib/manifest.nix       |  33 ++
 .../generic/manifests/redistrib/package.nix        |  32 ++
 .../generic/manifests/redistrib/release.nix        |  36 ++
 .../modules/generic/releases/default.nix           |  45 +++
 .../cuda-modules/modules/generic/types/default.nix |  39 +++
 .../cuda-modules/modules/tensorrt/default.nix      |  16 +
 .../cuda-modules/nccl-tests/default.nix            |  84 +++++
 pkgs/development/cuda-modules/nccl/default.nix     | 112 ++++++
 .../cuda-modules/nvcc-compatibilities.nix          |  12 +-
 pkgs/development/cuda-modules/saxpy/CMakeLists.txt |  12 +
 pkgs/development/cuda-modules/saxpy/default.nix    |  56 +++
 pkgs/development/cuda-modules/saxpy/saxpy.cu       |  68 ++++
 .../setup-hooks/auto-add-opengl-runpath-hook.sh    |  28 ++
 .../cuda-modules/setup-hooks/extension.nix         |  47 +++
 .../setup-hooks/mark-for-cudatoolkit-root-hook.sh  |  14 +
 .../cuda-modules/setup-hooks/setup-cuda-hook.sh    | 139 ++++++++
 pkgs/development/cuda-modules/tensorrt/fixup.nix   | 113 +++++++
 .../development/cuda-modules/tensorrt/releases.nix |   5 +-
 pkgs/development/cuda-modules/tensorrt/shims.nix   |  16 +
 .../libraries/science/math/cudnn/extension.nix     |  66 ----
 .../libraries/science/math/cudnn/generic.nix       | 170 ----------
 .../libraries/science/math/cutensor/generic.nix    |  88 -----
 .../libraries/science/math/nccl/default.nix        | 113 -------
 .../libraries/science/math/nccl/tests.nix          |  84 -----
 .../libraries/science/math/tensorrt/generic.nix    |  95 ------
 .../python-modules/tensorrt/default.nix            |   5 +-
 pkgs/test/cuda/cuda-library-samples/extension.nix  |  17 +-
 pkgs/test/cuda/cuda-library-samples/generic.nix    |  74 ++--
 pkgs/test/cuda/cuda-samples/extension.nix          |  49 ++-
 pkgs/test/cuda/cuda-samples/generic.nix            | 125 +++----
 pkgs/test/cuda/default.nix                         |  12 +-
 pkgs/top-level/all-packages.nix                    |   9 +-
 pkgs/top-level/cuda-packages.nix                   | 202 ++++++-----
 65 files changed, 2616 insertions(+), 1997 deletions(-)
 delete mode 100644 pkgs/development/compilers/cudatoolkit/auto-add-opengl-runpath-hook.sh
 delete mode 100644 pkgs/development/compilers/cudatoolkit/common.nix
 delete mode 100644 pkgs/development/compilers/cudatoolkit/hooks/mark-for-cudatoolkit-root-hook.sh
 delete mode 100644 pkgs/development/compilers/cudatoolkit/hooks/setup-cuda-hook.sh
 delete mode 100644 pkgs/development/compilers/cudatoolkit/redist/build-cuda-redist-package.nix
 delete mode 100644 pkgs/development/compilers/cudatoolkit/redist/extension.nix
 delete mode 100644 pkgs/development/compilers/cudatoolkit/redist/overrides.nix
 delete mode 100644 pkgs/development/compilers/cudatoolkit/saxpy/CMakeLists.txt
 delete mode 100644 pkgs/development/compilers/cudatoolkit/saxpy/default.nix
 delete mode 100644 pkgs/development/compilers/cudatoolkit/saxpy/saxpy.cu
 delete mode 100644 pkgs/development/compilers/cudatoolkit/stdenv.nix
 create mode 100644 pkgs/development/cuda-modules/backend-stdenv.nix
 create mode 100644 pkgs/development/cuda-modules/cuda/extension.nix
 create mode 100644 pkgs/development/cuda-modules/cuda/overrides.nix
 create mode 100644 pkgs/development/cuda-modules/cudatoolkit/default.nix
 create mode 100644 pkgs/development/cuda-modules/cudnn/fixup.nix
 create mode 100644 pkgs/development/cuda-modules/cudnn/shims.nix
 create mode 100644 pkgs/development/cuda-modules/cutensor/extension.nix
 create mode 100644 pkgs/development/cuda-modules/generic-builders/manifest.nix
 create mode 100644 pkgs/development/cuda-modules/generic-builders/multiplex.nix
 create mode 100644 pkgs/development/cuda-modules/modules/cuda/default.nix
 create mode 100644 pkgs/development/cuda-modules/modules/cudnn/default.nix
 create mode 100644 pkgs/development/cuda-modules/modules/cutensor/default.nix
 create mode 100644 pkgs/development/cuda-modules/modules/default.nix
 create mode 100644 pkgs/development/cuda-modules/modules/generic/default.nix
 create mode 100644 pkgs/development/cuda-modules/modules/generic/manifests/default.nix
 create mode 100644 pkgs/development/cuda-modules/modules/generic/manifests/feature/manifest.nix
 create mode 100644 pkgs/development/cuda-modules/modules/generic/manifests/feature/outputs.nix
 create mode 100644 pkgs/development/cuda-modules/modules/generic/manifests/feature/package.nix
 create mode 100644 pkgs/development/cuda-modules/modules/generic/manifests/feature/release.nix
 create mode 100644 pkgs/development/cuda-modules/modules/generic/manifests/redistrib/manifest.nix
 create mode 100644 pkgs/development/cuda-modules/modules/generic/manifests/redistrib/package.nix
 create mode 100644 pkgs/development/cuda-modules/modules/generic/manifests/redistrib/release.nix
 create mode 100644 pkgs/development/cuda-modules/modules/generic/releases/default.nix
 create mode 100644 pkgs/development/cuda-modules/modules/generic/types/default.nix
 create mode 100644 pkgs/development/cuda-modules/modules/tensorrt/default.nix
 create mode 100644 pkgs/development/cuda-modules/nccl-tests/default.nix
 create mode 100644 pkgs/development/cuda-modules/nccl/default.nix
 create mode 100644 pkgs/development/cuda-modules/saxpy/CMakeLists.txt
 create mode 100644 pkgs/development/cuda-modules/saxpy/default.nix
 create mode 100644 pkgs/development/cuda-modules/saxpy/saxpy.cu
 create mode 100644 pkgs/development/cuda-modules/setup-hooks/auto-add-opengl-runpath-hook.sh
 create mode 100644 pkgs/development/cuda-modules/setup-hooks/extension.nix
 create mode 100644 pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook.sh
 create mode 100644 pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook.sh
 create mode 100644 pkgs/development/cuda-modules/tensorrt/fixup.nix
 create mode 100644 pkgs/development/cuda-modules/tensorrt/shims.nix
 delete mode 100644 pkgs/development/libraries/science/math/cudnn/extension.nix
 delete mode 100644 pkgs/development/libraries/science/math/cudnn/generic.nix
 delete mode 100644 pkgs/development/libraries/science/math/cutensor/generic.nix
 delete mode 100644 pkgs/development/libraries/science/math/nccl/default.nix
 delete mode 100644 pkgs/development/libraries/science/math/nccl/tests.nix
 delete mode 100644 pkgs/development/libraries/science/math/tensorrt/generic.nix

diff --git a/pkgs/development/compilers/cudatoolkit/auto-add-opengl-runpath-hook.sh b/pkgs/development/compilers/cudatoolkit/auto-add-opengl-runpath-hook.sh
deleted file mode 100644
index f50a5f6c25c66..0000000000000
--- a/pkgs/development/compilers/cudatoolkit/auto-add-opengl-runpath-hook.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-# shellcheck shell=bash
-# Run addOpenGLRunpath on all dynamically linked, ELF files
-echo "Sourcing auto-add-opengl-runpath-hook"
-
-elfHasDynamicSection() {
-    patchelf --print-rpath "$1" >& /dev/null
-}
-
-autoAddOpenGLRunpathPhase() (
-  local outputPaths
-  mapfile -t outputPaths < <(for o in $(getAllOutputNames); do echo "${!o}"; done)
-  find "${outputPaths[@]}" -type f -executable -print0  | while IFS= read -rd "" f; do
-    if isELF "$f"; then
-      # patchelf returns an error on statically linked ELF files
-      if elfHasDynamicSection "$f" ; then
-        echo "autoAddOpenGLRunpathHook: patching $f"
-        addOpenGLRunpath "$f"
-      elif (( "${NIX_DEBUG:-0}" >= 1 )) ; then
-        echo "autoAddOpenGLRunpathHook: skipping a statically-linked ELF file $f"
-      fi
-    fi
-  done
-)
-
-if [ -z "${dontUseAutoAddOpenGLRunpath-}" ]; then
-  echo "Using autoAddOpenGLRunpathPhase"
-  postFixupHooks+=(autoAddOpenGLRunpathPhase)
-fi
diff --git a/pkgs/development/compilers/cudatoolkit/common.nix b/pkgs/development/compilers/cudatoolkit/common.nix
deleted file mode 100644
index 681549fa62dbe..0000000000000
--- a/pkgs/development/compilers/cudatoolkit/common.nix
+++ /dev/null
@@ -1,367 +0,0 @@
-args@
-{ version
-, sha256
-, url ? ""
-, name ? ""
-, developerProgram ? false
-, runPatches ? []
-, autoPatchelfHook
-, autoAddOpenGLRunpathHook
-, addOpenGLRunpath
-, alsa-lib
-, curlMinimal
-, expat
-, fetchurl
-, fontconfig
-, freetype
-, gdk-pixbuf
-, glib
-, glibc
-, gst_all_1
-, gtk2
-, lib
-, libxkbcommon
-, libkrb5
-, krb5
-, makeWrapper
-, markForCudatoolkitRootHook
-, ncurses5
-, numactl
-, nss
-, perl
-, python3 # FIXME: CUDAToolkit 10 may still need python27
-, pulseaudio
-, requireFile
-, setupCudaHook
-, stdenv
-, backendStdenv # E.g. gcc11Stdenv, set in extension.nix
-, unixODBC
-, wayland
-, xorg
-, zlib
-, freeglut
-, libGLU
-, libsForQt5
-, libtiff
-, qt6Packages
-, qt6
-, rdma-core
-, ucx
-, rsync
-}:
-
-backendStdenv.mkDerivation rec {
-  pname = "cudatoolkit";
-  inherit version runPatches;
-
-  dontPatchELF = true;
-  dontStrip = true;
-
-  src =
-    if developerProgram then
-      requireFile {
-        message = ''
-          This nix expression requires that ${args.name} is already part of the store.
-          Register yourself to NVIDIA Accelerated Computing Developer Program, retrieve the CUDA toolkit
-          at https://developer.nvidia.com/cuda-toolkit, and run the following command in the download directory:
-          nix-prefetch-url file://\$PWD/${args.name}
-        '';
-        inherit (args) name sha256;
-      }
-    else
-      fetchurl {
-        inherit (args) url sha256;
-      };
-
-  outputs = [ "out" "lib" "doc" ];
-
-  nativeBuildInputs = [
-    perl
-    makeWrapper
-    rsync
-    addOpenGLRunpath
-    autoPatchelfHook
-    autoAddOpenGLRunpathHook
-    markForCudatoolkitRootHook
-  ] ++ lib.optionals (lib.versionOlder version "11") [
-    libsForQt5.wrapQtAppsHook
-  ] ++ lib.optionals (lib.versionAtLeast version "11.8") [
-    qt6Packages.wrapQtAppsHook
-  ];
-  depsTargetTargetPropagated = [
-    setupCudaHook
-  ];
-  buildInputs = lib.optionals (lib.versionOlder version "11") [
-    libsForQt5.qt5.qtwebengine
-    freeglut
-    libGLU
-  ] ++ [
-    # To get $GDK_PIXBUF_MODULE_FILE via setup-hook
-    gdk-pixbuf
-
-    # For autoPatchelf
-    ncurses5
-    expat
-    python3
-    zlib
-    glibc
-    xorg.libX11
-    xorg.libXext
-    xorg.libXrender
-    xorg.libXt
-    xorg.libXtst
-    xorg.libXi
-    xorg.libXext
-    xorg.libXdamage
-    xorg.libxcb
-    xorg.xcbutilimage
-    xorg.xcbutilrenderutil
-    xorg.xcbutilwm
-    xorg.xcbutilkeysyms
-    pulseaudio
-    libxkbcommon
-    libkrb5
-    krb5
-    gtk2
-    glib
-    fontconfig
-    freetype
-    numactl
-    nss
-    unixODBC
-    alsa-lib
-    wayland
-  ] ++ lib.optionals (lib.versionAtLeast version "11.8") [
-    (lib.getLib libtiff)
-    qt6Packages.qtwayland
-    rdma-core
-    (ucx.override { enableCuda = false; }) # Avoid infinite recursion
-    xorg.libxshmfence
-    xorg.libxkbfile
-  ] ++ (lib.optionals (lib.versionAtLeast version "12") (map lib.getLib ([
-    # Used by `/target-linux-x64/CollectX/clx` and `/target-linux-x64/CollectX/libclx_api.so` for:
-    # - `libcurl.so.4`
-    curlMinimal
-
-    # Used by `/host-linux-x64/Scripts/WebRTCContainer/setup/neko/server/bin/neko`
-    gst_all_1.gstreamer
-    gst_all_1.gst-plugins-base
-  ]) ++ (with qt6; [
-    qtmultimedia
-    qttools
-    qtpositioning
-    qtscxml
-    qtsvg
-    qtwebchannel
-    qtwebengine
-  ])));
-
-  # Prepended to runpaths by autoPatchelf.
-  # The order inherited from older rpath preFixup code
-  runtimeDependencies = [
-    (placeholder "lib")
-    (placeholder "out")
-    "${placeholder "out"}/nvvm"
-    # NOTE: use the same libstdc++ as the rest of nixpkgs, not from backendStdenv
-    "${lib.getLib stdenv.cc.cc}/lib64"
-    "${placeholder "out"}/jre/lib/amd64/jli"
-    "${placeholder "out"}/lib64"
-    "${placeholder "out"}/nvvm/lib64"
-  ];
-
-  autoPatchelfIgnoreMissingDeps = [
-    # This is the hardware-dependent userspace driver that comes from
-    # nvidia_x11 package. It must be deployed at runtime in
-    # /run/opengl-driver/lib or pointed at by LD_LIBRARY_PATH variable, rather
-    # than pinned in runpath
-    "libcuda.so.1"
-
-    # The krb5 expression ships libcom_err.so.3 but cudatoolkit asks for the
-    # older
-    # This dependency is asked for by target-linux-x64/CollectX/RedHat/x86_64/libssl.so.10
-    # - do we even want to use nvidia-shipped libssl?
-    "libcom_err.so.2"
-  ];
-
-  preFixup = if lib.versionOlder version "11" then ''
-    patchelf $out/targets/*/lib/libnvrtc.so --add-needed libnvrtc-builtins.so
-  '' else ''
-    patchelf $out/lib64/libnvrtc.so --add-needed libnvrtc-builtins.so
-  '';
-
-  unpackPhase = ''
-    sh $src --keep --noexec
-
-    ${lib.optionalString (lib.versionOlder version "10.1") ''
-      cd pkg/run_files
-      sh cuda-linux*.run --keep --noexec
-      sh cuda-samples*.run --keep --noexec
-      mv pkg ../../$(basename $src)
-      cd ../..
-      rm -rf pkg
-
-      for patch in $runPatches; do
-        sh $patch --keep --noexec
-        mv pkg $(basename $patch)
-      done
-    ''}
-  '';
-
-  installPhase = ''
-    runHook preInstall
-    mkdir $out
-    ${lib.optionalString (lib.versionOlder version "10.1") ''
-    cd $(basename $src)
-    export PERL5LIB=.
-    perl ./install-linux.pl --prefix="$out"
-    cd ..
-    for patch in $runPatches; do
-      cd $(basename $patch)
-      perl ./install_patch.pl --silent --accept-eula --installdir="$out"
-      cd ..
-    done
-    ''}
-    ${lib.optionalString (lib.versionAtLeast version "10.1" && lib.versionOlder version "11") ''
-      cd pkg/builds/cuda-toolkit
-      mv * $out/
-    ''}
-    ${lib.optionalString (lib.versionAtLeast version "11") ''
-      mkdir -p $out/bin $out/lib64 $out/include $doc
-      for dir in pkg/builds/* pkg/builds/cuda_nvcc/nvvm pkg/builds/cuda_cupti/extras/CUPTI; do
-        if [ -d $dir/bin ]; then
-          mv $dir/bin/* $out/bin
-        fi
-        if [ -d $dir/doc ]; then
-          (cd $dir/doc && find . -type d -exec mkdir -p $doc/\{} \;)
-          (cd $dir/doc && find . \( -type f -o -type l \) -exec mv \{} $doc/\{} \;)
-        fi
-        if [ -L $dir/include ] || [ -d $dir/include ]; then
-          (cd $dir/include && find . -type d -exec mkdir -p $out/include/\{} \;)
-          (cd $dir/include && find . \( -type f -o -type l \) -exec mv \{} $out/include/\{} \;)
-        fi
-        if [ -L $dir/lib64 ] || [ -d $dir/lib64 ]; then
-          (cd $dir/lib64 && find . -type d -exec mkdir -p $out/lib64/\{} \;)
-          (cd $dir/lib64 && find . \( -type f -o -type l \) -exec mv \{} $out/lib64/\{} \;)
-        fi
-      done
-      mv pkg/builds/cuda_nvcc/nvvm $out/nvvm
-
-      mv pkg/builds/cuda_sanitizer_api $out/cuda_sanitizer_api
-      ln -s $out/cuda_sanitizer_api/compute-sanitizer/compute-sanitizer $out/bin/compute-sanitizer
-
-      mv pkg/builds/nsight_systems/target-linux-x64 $out/target-linux-x64
-      mv pkg/builds/nsight_systems/host-linux-x64 $out/host-linux-x64
-      rm $out/host-linux-x64/libstdc++.so*
-    ''}
-      ${lib.optionalString (lib.versionAtLeast version "11.8")
-      # error: auto-patchelf could not satisfy dependency libtiff.so.5 wanted by /nix/store/.......-cudatoolkit-12.0.1/host-linux-x64/Plugins/imageformats/libqtiff.so
-      # we only ship libtiff.so.6, so let's use qt plugins built by Nix.
-      # TODO: don't copy, come up with a symlink-based "merge"
-    ''
-      rsync ${lib.getLib qt6Packages.qtimageformats}/lib/qt-6/plugins/ $out/host-linux-x64/Plugins/ -aP
-    ''}
-
-    rm -f $out/tools/CUDA_Occupancy_Calculator.xls # FIXME: why?
-
-    ${lib.optionalString (lib.versionOlder version "10.1") ''
-    # let's remove the 32-bit libraries, they confuse the lib64->lib mover
-    rm -rf $out/lib
-    ''}
-
-    ${lib.optionalString (lib.versionAtLeast version "12.0") ''
-    rm $out/host-linux-x64/libQt6*
-    ''}
-
-    # Remove some cruft.
-    ${lib.optionalString ((lib.versionAtLeast version "7.0") && (lib.versionOlder version "10.1"))
-      "rm $out/bin/uninstall*"}
-
-    # Fixup path to samples (needed for cuda 6.5 or else nsight will not find them)
-    if [ -d "$out"/cuda-samples ]; then
-        mv "$out"/cuda-samples "$out"/samples
-    fi
-
-    # Change the #error on GCC > 4.9 to a #warning.
-    sed -i $out/include/host_config.h -e 's/#error\(.*unsupported GNU version\)/#warning\1/'
-
-    # Fix builds with newer glibc version
-    sed -i "1 i#define _BITS_FLOATN_H" "$out/include/host_defines.h"
-  '' +
-  # Point NVCC at a compatible compiler
-  # CUDA_TOOLKIT_ROOT_DIR is legacy,
-  # Cf. https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables
-  ''
-    mkdir -p $out/nix-support
-    cat <<EOF >> $out/nix-support/setup-hook
-    cmakeFlags+=' -DCUDA_TOOLKIT_ROOT_DIR=$out'
-    EOF
-
-    # Move some libraries to the lib output so that programs that
-    # depend on them don't pull in this entire monstrosity.
-    mkdir -p $lib/lib
-    mv -v $out/lib64/libcudart* $lib/lib/
-
-    # Remove OpenCL libraries as they are provided by ocl-icd and driver.
-    rm -f $out/lib64/libOpenCL*
-    ${lib.optionalString (lib.versionAtLeast version "10.1" && (lib.versionOlder version "11")) ''
-      mv $out/lib64 $out/lib
-      mv $out/extras/CUPTI/lib64/libcupti* $out/lib
-    ''}
-
-    # nvprof do not find any program to profile if LD_LIBRARY_PATH is not set
-    wrapProgram $out/bin/nvprof \
-      --prefix LD_LIBRARY_PATH : $out/lib
-  '' + lib.optionalString (lib.versionOlder version "8.0") ''
-    # Hack to fix building against recent Glibc/GCC.
-    echo "NIX_CFLAGS_COMPILE+=' -D_FORCE_INLINES'" >> $out/nix-support/setup-hook
-  ''
-  # 11.8 includes a broken symlink, include/include, pointing to targets/x86_64-linux/include
-  + lib.optionalString (lib.versions.majorMinor version == "11.8") ''
-    rm $out/include/include
-  '' + ''
-    runHook postInstall
-  '';
-
-  postInstall = ''
-    for b in nvvp ${lib.optionalString (lib.versionOlder version "11") "nsight"}; do
-      wrapProgram "$out/bin/$b" \
-        --set GDK_PIXBUF_MODULE_FILE "$GDK_PIXBUF_MODULE_FILE"
-    done
-  '';
-
-
-  # cuda-gdb doesn't run correctly when not using sandboxing, so
-  # temporarily disabling the install check.  This should be set to true
-  # when we figure out how to get `cuda-gdb --version` to run correctly
-  # when not using sandboxing.
-  doInstallCheck = false;
-  postInstallCheck = let
-  in ''
-    # Smoke test binaries
-    pushd $out/bin
-    for f in *; do
-      case $f in
-        crt)                           continue;;
-        nvcc.profile)                  continue;;
-        nsight_ee_plugins_manage.sh)   continue;;
-        uninstall_cuda_toolkit_6.5.pl) continue;;
-        computeprof|nvvp|nsight)       continue;; # GUIs don't feature "--version"
-        *)                             echo "Executing '$f --version':"; ./$f --version;;
-      esac
-    done
-    popd
-  '';
-  passthru = {
-    inherit (backendStdenv) cc;
-    majorMinorVersion = lib.versions.majorMinor version;
-    majorVersion = lib.versions.majorMinor version;
-  };
-
-  meta = with lib; {
-    description = "A compiler for NVIDIA GPUs, math libraries, and tools";
-    homepage = "https://developer.nvidia.com/cuda-toolkit";
-    platforms = [ "x86_64-linux" ];
-    license = licenses.nvidiaCuda;
-    maintainers = teams.cuda.members;
-  };
-}
diff --git a/pkgs/development/compilers/cudatoolkit/hooks/mark-for-cudatoolkit-root-hook.sh b/pkgs/development/compilers/cudatoolkit/hooks/mark-for-cudatoolkit-root-hook.sh
deleted file mode 100644
index ba04c2e0806af..0000000000000
--- a/pkgs/development/compilers/cudatoolkit/hooks/mark-for-cudatoolkit-root-hook.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-# shellcheck shell=bash
-
-# Should we mimick cc-wrapper's "hygiene"?
-[[ -z ${strictDeps-} ]] || (( "$hostOffset" < 0 )) || return 0
-
-echo "Sourcing mark-for-cudatoolkit-root-hook" >&2
-
-markForCUDAToolkit_ROOT() {
-    mkdir -p "${prefix}/nix-support"
-    [[ -f "${prefix}/nix-support/include-in-cudatoolkit-root" ]] && return
-    echo "$pname-$output" > "${prefix}/nix-support/include-in-cudatoolkit-root"
-}
-
-fixupOutputHooks+=(markForCUDAToolkit_ROOT)
diff --git a/pkgs/development/compilers/cudatoolkit/hooks/setup-cuda-hook.sh b/pkgs/development/compilers/cudatoolkit/hooks/setup-cuda-hook.sh
deleted file mode 100644
index 7b7b3bdde80e3..0000000000000
--- a/pkgs/development/compilers/cudatoolkit/hooks/setup-cuda-hook.sh
+++ /dev/null
@@ -1,139 +0,0 @@
-# shellcheck shell=bash
-
-# Only run the hook from nativeBuildInputs
-(( "$hostOffset" == -1 && "$targetOffset" == 0)) || return 0
-
-guard=Sourcing
-reason=
-
-[[ -n ${cudaSetupHookOnce-} ]] && guard=Skipping && reason=" because the hook has been propagated more than once"
-
-if (( "${NIX_DEBUG:-0}" >= 1 )) ; then
-    echo "$guard hostOffset=$hostOffset targetOffset=$targetOffset setupCudaHook$reason" >&2
-else
-    echo "$guard setup-cuda-hook$reason" >&2
-fi
-
-[[ "$guard" = Sourcing ]] || return 0
-
-declare -g cudaSetupHookOnce=1
-declare -Ag cudaHostPathsSeen=()
-declare -Ag cudaOutputToPath=()
-
-extendcudaHostPathsSeen() {
-    (( "${NIX_DEBUG:-0}" >= 1 )) && echo "extendcudaHostPathsSeen $1" >&2
-
-    local markerPath="$1/nix-support/include-in-cudatoolkit-root"
-    [[ ! -f "${markerPath}" ]] && return
-    [[ -v cudaHostPathsSeen[$1] ]] && return
-
-    cudaHostPathsSeen["$1"]=1
-
-    # E.g. cuda_cudart-lib
-    local cudaOutputName
-    read -r cudaOutputName < "$markerPath"
-
-    [[ -z "$cudaOutputName" ]] && return
-
-    local oldPath="${cudaOutputToPath[$cudaOutputName]-}"
-    [[ -n "$oldPath" ]] && echo "extendcudaHostPathsSeen: warning: overwriting $cudaOutputName from $oldPath to $1" >&2
-    cudaOutputToPath["$cudaOutputName"]="$1"
-}
-addEnvHooks "$targetOffset" extendcudaHostPathsSeen
-
-setupCUDAToolkit_ROOT() {
-    (( "${NIX_DEBUG:-0}" >= 1 )) && echo "setupCUDAToolkit_ROOT: cudaHostPathsSeen=${!cudaHostPathsSeen[*]}" >&2
-
-    for path in "${!cudaHostPathsSeen[@]}" ; do
-        addToSearchPathWithCustomDelimiter ";" CUDAToolkit_ROOT "$path"
-        if [[ -d "$path/include" ]] ; then
-            addToSearchPathWithCustomDelimiter ";" CUDAToolkit_INCLUDE_DIR "$path/include"
-        fi
-    done
-
-    export cmakeFlags+=" -DCUDAToolkit_INCLUDE_DIR=$CUDAToolkit_INCLUDE_DIR -DCUDAToolkit_ROOT=$CUDAToolkit_ROOT"
-}
-preConfigureHooks+=(setupCUDAToolkit_ROOT)
-
-setupCUDAToolkitCompilers() {
-    echo Executing setupCUDAToolkitCompilers >&2
-
-    if [[ -n "${dontSetupCUDAToolkitCompilers-}" ]] ; then
-        return
-    fi
-
-    # Point NVCC at a compatible compiler
-
-    # For CMake-based projects:
-    # https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables
-    # https://cmake.org/cmake/help/latest/envvar/CUDAHOSTCXX.html
-    # https://cmake.org/cmake/help/latest/variable/CMAKE_CUDA_HOST_COMPILER.html
-
-    export cmakeFlags+=" -DCUDA_HOST_COMPILER=@ccFullPath@"
-    export cmakeFlags+=" -DCMAKE_CUDA_HOST_COMPILER=@ccFullPath@"
-
-    # For non-CMake projects:
-    # We prepend --compiler-bindir to nvcc flags.
-    # Downstream packages can override these, because NVCC
-    # uses the last --compiler-bindir it gets on the command line.
-    # FIXME: this results in "incompatible redefinition" warnings.
-    # https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#compiler-bindir-directory-ccbin
-    if [ -z "${CUDAHOSTCXX-}" ]; then
-      export CUDAHOSTCXX="@ccFullPath@";
-    fi
-
-    export NVCC_PREPEND_FLAGS+=" --compiler-bindir=@ccRoot@/bin"
-
-    # NOTE: We set -Xfatbin=-compress-all, which reduces the size of the compiled
-    #   binaries. If binaries grow over 2GB, they will fail to link. This is a problem for us, as
-    #   the default set of CUDA capabilities we build can regularly cause this to occur (for
-    #   example, with Magma).
-    #
-    # @SomeoneSerge: original comment was made by @ConnorBaker in .../cudatoolkit/common.nix
-    if [[ -z "${dontCompressFatbin-}" ]]; then
-        export NVCC_PREPEND_FLAGS+=" -Xfatbin=-compress-all"
-    fi
-
-    # CMake's enable_language(CUDA) runs a compiler test and it doesn't account for
-    # CUDAToolkit_ROOT. We have to help it locate libcudart
-    if [[ -z "${nvccDontPrependCudartFlags-}" ]] ; then
-        if [[ ! -v cudaOutputToPath["cuda_cudart-out"] ]] ; then
-            echo "setupCUDAToolkitCompilers: missing cudaPackages.cuda_cudart. This may become an an error in the future" >&2
-            # exit 1
-        fi
-        for pkg in "${!cudaOutputToPath[@]}" ; do
-            [[ ! "$pkg" = cuda_cudart* ]] && continue
-
-            local path="${cudaOutputToPath[$pkg]}"
-            if [[ -d "$path/include" ]] ; then
-                export NVCC_PREPEND_FLAGS+=" -I$path/include"
-            fi
-            if [[ -d "$path/lib" ]] ; then
-                export NVCC_PREPEND_FLAGS+=" -L$path/lib"
-            fi
-        done
-    fi
-}
-preConfigureHooks+=(setupCUDAToolkitCompilers)
-
-propagateCudaLibraries() {
-    (( "${NIX_DEBUG:-0}" >= 1 )) && echo "propagateCudaLibraries: cudaPropagateToOutput=$cudaPropagateToOutput cudaHostPathsSeen=${!cudaHostPathsSeen[*]}" >&2
-
-    [[ -z "${cudaPropagateToOutput-}" ]] && return
-
-    mkdir -p "${!cudaPropagateToOutput}/nix-support"
-    # One'd expect this should be propagated-bulid-build-deps, but that doesn't seem to work
-    echo "@setupCudaHook@" >> "${!cudaPropagateToOutput}/nix-support/propagated-native-build-inputs"
-
-    local propagatedBuildInputs=( "${!cudaHostPathsSeen[@]}" )
-    for output in $(getAllOutputNames) ; do
-        if [[ ! "$output" = "$cudaPropagateToOutput" ]] ; then
-            propagatedBuildInputs+=( "${!output}" )
-        fi
-        break
-    done
-
-    # One'd expect this should be propagated-host-host-deps, but that doesn't seem to work
-    printWords "${propagatedBuildInputs[@]}" >> "${!cudaPropagateToOutput}/nix-support/propagated-build-inputs"
-}
-postFixupHooks+=(propagateCudaLibraries)
diff --git a/pkgs/development/compilers/cudatoolkit/redist/build-cuda-redist-package.nix b/pkgs/development/compilers/cudatoolkit/redist/build-cuda-redist-package.nix
deleted file mode 100644
index 1e4a3ab178559..0000000000000
--- a/pkgs/development/compilers/cudatoolkit/redist/build-cuda-redist-package.nix
+++ /dev/null
@@ -1,178 +0,0 @@
-# Type Aliases
-#
-# See ./extension.nix:
-# - ReleaseAttrs
-# - ReleaseFeaturesAttrs
-#
-# General callPackage-supplied arguments
-{ lib
-, stdenv
-, backendStdenv
-, fetchurl
-, autoPatchelfHook
-, autoAddOpenGLRunpathHook
-, markForCudatoolkitRootHook
-, lndir
-, symlinkJoin
-}:
-# Function arguments
-{
-  # Short package name (e.g., "cuda_cccl")
-  # pname : String
-  pname
-, # Long package name (e.g., "CXX Core Compute Libraries")
-  # description : String
-  description
-, # platforms : List System
-  platforms
-, # version : Version
-  version
-, # releaseAttrs : ReleaseAttrs
-  releaseAttrs
-, # releaseFeaturesAttrs : ReleaseFeaturesAttrs
-  releaseFeaturesAttrs
-,
-}:
-let
-  # Useful imports
-  inherit (lib.lists) optionals;
-  inherit (lib.meta) getExe;
-  inherit (lib.strings) optionalString;
-in
-backendStdenv.mkDerivation (finalAttrs: {
-  # NOTE: Even though there's no actual buildPhase going on here, the derivations of the
-  # redistributables are sensitive to the compiler flags provided to stdenv. The patchelf package
-  # is sensitive to the compiler flags provided to stdenv, and we depend on it. As such, we are
-  # also sensitive to the compiler flags provided to stdenv.
-  inherit pname version;
-  strictDeps = true;
-
-  outputs = with releaseFeaturesAttrs;
-    [ "out" ]
-    ++ optionals hasBin [ "bin" ]
-    ++ optionals hasLib [ "lib" ]
-    ++ optionals hasStatic [ "static" ]
-    ++ optionals hasDev [ "dev" ]
-    ++ optionals hasDoc [ "doc" ]
-    ++ optionals hasSample [ "sample" ];
-
-  src = fetchurl {
-    url = "https://developer.download.nvidia.com/compute/cuda/redist/${releaseAttrs.relative_path}";
-    inherit (releaseAttrs) sha256;
-  };
-
-  # We do need some other phases, like configurePhase, so the multiple-output setup hook works.
-  dontBuild = true;
-
-  nativeBuildInputs = [
-    autoPatchelfHook
-    # This hook will make sure libcuda can be found
-    # in typically /lib/opengl-driver by adding that
-    # directory to the rpath of all ELF binaries.
-    # Check e.g. with `patchelf --print-rpath path/to/my/binary
-    autoAddOpenGLRunpathHook
-    markForCudatoolkitRootHook
-  ];
-
-  buildInputs = [
-    # autoPatchelfHook will search for a libstdc++ and we're giving it
-    # one that is compatible with the rest of nixpkgs, even when
-    # nvcc forces us to use an older gcc
-    # NB: We don't actually know if this is the right thing to do
-    stdenv.cc.cc.lib
-  ];
-
-  # Picked up by autoPatchelf
-  # Needed e.g. for libnvrtc to locate (dlopen) libnvrtc-builtins
-  appendRunpaths = [
-    "$ORIGIN"
-  ];
-
-  installPhase = with releaseFeaturesAttrs;
-    # Pre-install hook
-    ''
-      runHook preInstall
-    ''
-    # doc and dev have special output handling. Other outputs need to be moved to their own
-    # output.
-    # Note that moveToOutput operates on all outputs:
-    # https://github.com/NixOS/nixpkgs/blob/2920b6fc16a9ed5d51429e94238b28306ceda79e/pkgs/build-support/setup-hooks/multiple-outputs.sh#L105-L107
-    + ''
-      mkdir -p "$out"
-      rm LICENSE
-      mv * "$out"
-    ''
-    # Handle bin, which defaults to out
-    + optionalString hasBin ''
-      moveToOutput "bin" "$bin"
-    ''
-    # Handle lib, which defaults to out
-    + optionalString hasLib ''
-      moveToOutput "lib" "$lib"
-    ''
-    # Handle static libs, which isn't handled by the setup hook
-    + optionalString hasStatic ''
-      moveToOutput "**/*.a" "$static"
-    ''
-    # Handle samples, which isn't handled by the setup hook
-    + optionalString hasSample ''
-      moveToOutput "samples" "$sample"
-    ''
-    # Post-install hook
-    + ''
-      runHook postInstall
-    '';
-
-  # The out output leverages the same functionality which backs the `symlinkJoin` function in
-  # Nixpkgs:
-  # https://github.com/NixOS/nixpkgs/blob/d8b2a92df48f9b08d68b0132ce7adfbdbc1fbfac/pkgs/build-support/trivial-builders/default.nix#L510
-  #
-  # That should allow us to emulate "fat" default outputs without having to actually create them.
-  #
-  # It is important that this run after the autoPatchelfHook, otherwise the symlinks in out will reference libraries in lib, creating a circular dependency.
-  postPhases = [ "postPatchelf" ];
-  # For each output, create a symlink to it in the out output.
-  # NOTE: We must recreate the out output here, because the setup hook will have deleted it
-  # if it was empty.
-  # NOTE: Do not use optionalString based on whether `outputs` contains only `out` -- phases
-  # which are empty strings are skipped/unset and result in errors of the form "command not
-  # found: <customPhaseName>".
-  postPatchelf = ''
-    mkdir -p "$out"
-    for output in $outputs; do
-      if [ "$output" = "out" ]; then
-        continue
-      fi
-      ${getExe lndir} "''${!output}" "$out"
-    done
-  '';
-
-  # Make the CUDA-patched stdenv available
-  passthru.stdenv = backendStdenv;
-
-  # Setting propagatedBuildInputs to false will prevent outputs known to the multiple-outputs
-  # from depending on `out` by default.
-  # https://github.com/NixOS/nixpkgs/blob/2920b6fc16a9ed5d51429e94238b28306ceda79e/pkgs/build-support/setup-hooks/multiple-outputs.sh#L196
-  # Indeed, we want to do the opposite -- fat "out" outputs that contain all the other outputs.
-  propagatedBuildOutputs = false;
-
-  # By default, if the dev output exists it just uses that.
-  # However, because we disabled propagatedBuildOutputs, dev doesn't contain libraries or
-  # anything of the sort. To remedy this, we set outputSpecified to true, and use
-  # outputsToInstall, which tells Nix which outputs to use when the package name is used
-  # unqualified (that is, without an explicit output).
-  outputSpecified = true;
-
-  meta = {
-    inherit platforms;
-    description = "${description}. By downloading and using the packages you accept the terms and conditions of the ${finalAttrs.meta.license.shortName}";
-    license = lib.licenses.nvidiaCudaRedist // {
-      url = "https://developer.download.nvidia.com/compute/cuda/redist/${releaseAttrs.license_path or "${pname}/LICENSE.txt"}";
-    };
-    sourceProvenance = [ lib.sourceTypes.binaryNativeCode ];
-    maintainers = lib.teams.cuda.members;
-    # Force the use of the default, fat output by default (even though `dev` exists, which
-    # causes Nix to prefer that output over the others if outputSpecified isn't set).
-    outputsToInstall = [ "out" ];
-  };
-})
diff --git a/pkgs/development/compilers/cudatoolkit/redist/extension.nix b/pkgs/development/compilers/cudatoolkit/redist/extension.nix
deleted file mode 100644
index ea6fd581cde4f..0000000000000
--- a/pkgs/development/compilers/cudatoolkit/redist/extension.nix
+++ /dev/null
@@ -1,139 +0,0 @@
-# Type Aliases
-#
-# ReleaseAttrs : {
-#   "relative_path" : String,
-#   "sha256" : String,
-#   "md5" : String,
-#   "size" : String,
-# }
-#
-# NOTE: PackageAttrs must have at least one of the arches.
-# PackageAttrs : {
-#   "name" : String,
-#   "license" : String,
-#   "version" : String,
-#   "license_path" : None | String,
-#   "linux-aarch64" : None | ReleaseAttrs,
-#   "linux-ppc64le" : None | ReleaseAttrs,
-#   "linux-sbsa" : None | ReleaseAttrs,
-#   "linux-x86_64" : None | ReleaseAttrs,
-#   "windows-x86_64" : None | ReleaseAttrs,
-# }
-#
-# ReleaseFeaturesAttrs : {
-#   "hasBin" : Boolean,
-#   "hasDev" : Boolean,
-#   "hasDoc" : Boolean,
-#   "hasLib" : Boolean,
-#   "hasOut" : Boolean,
-#   "hasSample" : Boolean,
-#   "hasStatic" : Boolean,
-#   "rootDirs" : List String,
-# }
-#
-# NOTE: PackageFeatureAttrs must have at least one of the arches.
-# PackageFeatureAttrs : {
-#   "linux-aarch64" : None | ReleaseFeaturesAttrs,
-#   "linux-ppc64le" : None | ReleaseFeaturesAttrs,
-#   "linux-sbsa" : None | ReleaseFeaturesAttrs,
-#   "linux-x86_64" : None | ReleaseFeaturesAttrs,
-#   "windows-x86_64" : None | ReleaseFeaturesAttrs,
-# }
-#
-final: prev:
-let
-  # NOTE: We use hasAttr throughout instead of the (?) operator because hasAttr does not require
-  # us to interpolate our variables into strings (like ${attrName}).
-  inherit (builtins) attrNames concatMap hasAttr listToAttrs removeAttrs;
-  inherit (final) callPackage;
-  inherit (prev) cudaVersion;
-  inherit (prev.lib.attrsets) nameValuePair optionalAttrs;
-  inherit (prev.lib.lists) optionals;
-  inherit (prev.lib.trivial) flip importJSON pipe;
-
-  # Manifest files for CUDA redistributables (aka redist). These can be found at
-  # https://developer.download.nvidia.com/compute/cuda/redist/
-  # Maps a cuda version to the specific version of the manifest.
-  cudaVersionMap = {
-    "11.4" = "11.4.4";
-    "11.5" = "11.5.2";
-    "11.6" = "11.6.2";
-    "11.7" = "11.7.0";
-    "11.8" = "11.8.0";
-    "12.0" = "12.0.1";
-    "12.1" = "12.1.1";
-    "12.2" = "12.2.0";
-  };
-
-  # Check if the current CUDA version is supported.
-  cudaVersionMappingExists = hasAttr cudaVersion cudaVersionMap;
-
-  # Maps a cuda version to its manifest files.
-  # The manifest itself is from NVIDIA, but the features manifest is generated
-  # by us ahead of time and allows us to split pacakges into multiple outputs.
-  # Package names (e.g., "cuda_cccl") are mapped to their attributes or features.
-  # Since we map each attribute to a package name, we need to make sure to get rid of meta
-  # attributes included in the manifest. Currently, these are any of the following:
-  # - release_date
-  # - release_label
-  # - release_product
-  redistManifests =
-    let
-      # Remove meta attributes from the manifest
-      # removeAttrs : AttrSet String b -> Attr String b
-      removeMetaAttrs = flip removeAttrs [ "release_date" "release_label" "release_product" ];
-      # processManifest : Path -> Attr Set (String PackageAttrs)
-      processManifest = flip pipe [ importJSON removeMetaAttrs ];
-      # fullCudaVersion : String
-      fullCudaVersion = cudaVersionMap.${cudaVersion};
-    in
-    {
-      # features : Attr Set (String PackageFeatureAttrs)
-      features = processManifest (./manifests + "/redistrib_features_${fullCudaVersion}.json");
-      # manifest : Attr Set (String PackageAttrs)
-      manifest = processManifest (./manifests + "/redistrib_${fullCudaVersion}.json");
-    };
-
-  # Function to build a single redist package
-  buildRedistPackage = callPackage ./build-cuda-redist-package.nix { };
-
-  # Function that builds all redist packages given manifests
-  buildRedistPackages = { features, manifest }:
-    let
-      wrapper = pname:
-        let
-          # Get the redist architectures the package provides distributables for
-          packageAttrs = manifest.${pname};
-
-          # Check if supported
-          # TODO(@connorbaker): Currently hardcoding x86_64-linux as the only supported platform.
-          isSupported = packageAttrs ? linux-x86_64;
-
-          # Build the derivation
-          drv = buildRedistPackage {
-            inherit pname;
-            # TODO(@connorbaker): We currently discard the license attribute.
-            inherit (manifest.${pname}) version;
-            description = manifest.${pname}.name;
-            platforms = [ "x86_64-linux" ];
-            releaseAttrs = manifest.${pname}.linux-x86_64;
-            releaseFeaturesAttrs = features.${pname}.linux-x86_64;
-          };
-
-          # Wrap in an optional so we can filter out the empty lists created by unsupported
-          # packages with concatMap.
-          wrapped = optionals isSupported [ (nameValuePair pname drv) ];
-        in
-        wrapped;
-
-      # concatMap provides us an easy way to filter out packages for unsupported platforms.
-      # We wrap the buildRedistPackage call in a list to prevent errors when the package is not
-      # supported (by returning an empty list).
-      redistPackages = listToAttrs (concatMap wrapper (attrNames manifest));
-    in
-    redistPackages;
-
-  # All redistributable packages for the current CUDA version
-  redistPackages = optionalAttrs cudaVersionMappingExists (buildRedistPackages redistManifests);
-in
-redistPackages
diff --git a/pkgs/development/compilers/cudatoolkit/redist/overrides.nix b/pkgs/development/compilers/cudatoolkit/redist/overrides.nix
deleted file mode 100644
index 71e70e8d7b704..0000000000000
--- a/pkgs/development/compilers/cudatoolkit/redist/overrides.nix
+++ /dev/null
@@ -1,119 +0,0 @@
-final: prev:
-let
-  inherit (prev) lib pkgs;
-  cudaVersionOlder = lib.versionOlder final.cudaVersion;
-  cudaVersionAtLeast = lib.versionAtLeast final.cudaVersion;
-in
-(lib.filterAttrs (attr: _: (prev ? "${attr}")) {
-  ### Overrides to fix the components of cudatoolkit-redist
-
-  # Attributes that don't exist in the previous set are removed.
-  # That means only overrides can go here, and not new expressions!
-
-  libcufile = prev.libcufile.overrideAttrs (oldAttrs: {
-    buildInputs = oldAttrs.buildInputs ++ [
-      final.libcublas.lib
-      pkgs.numactl
-      pkgs.rdma-core
-    ];
-    # libcuda needs to be resolved during runtime
-    autoPatchelfIgnoreMissingDeps =
-      ["libcuda.so.1"]
-      # Before 12.0 libcufile depends on itself for some reason.
-      ++ lib.optionals (cudaVersionOlder "12.0") [
-        "libcufile.so.0"
-      ];
-  });
-
-  libcusolver = final.addBuildInputs prev.libcusolver (
-    # Always depends on this
-    [final.libcublas.lib]
-    # Dependency from 12.0 and on
-    ++ lib.optionals (cudaVersionAtLeast "12.0") [
-      final.libnvjitlink.lib
-    ]
-    # Dependency from 12.1 and on
-    ++ lib.optionals (cudaVersionAtLeast "12.1") [
-      final.libcusparse.lib
-    ]
-  );
-
-  libcusparse = final.addBuildInputs prev.libcusparse (
-    lib.optionals (cudaVersionAtLeast "12.0") [
-      final.libnvjitlink.lib
-    ]
-  );
-
-  cuda_gdb = final.addBuildInputs prev.cuda_gdb (
-    # x86_64 only needs gmp from 12.0 and on
-    lib.optionals (cudaVersionAtLeast "12.0") [
-      pkgs.gmp
-    ]
-  );
-
-  cuda_nvcc = prev.cuda_nvcc.overrideAttrs (oldAttrs: {
-    propagatedBuildInputs = [
-      final.setupCudaHook
-    ];
-
-    meta = (oldAttrs.meta or { }) // {
-      mainProgram = "nvcc";
-    };
-  });
-
-  cuda_nvprof = prev.cuda_nvprof.overrideAttrs (oldAttrs: {
-    nativeBuildInputs = oldAttrs.nativeBuildInputs ++ [ pkgs.addOpenGLRunpath ];
-    buildInputs = oldAttrs.buildInputs ++ [ final.cuda_cupti.lib ];
-    # libcuda needs to be resolved during runtime
-    autoPatchelfIgnoreMissingDeps = ["libcuda.so.1"];
-  });
-
-  cuda_demo_suite = final.addBuildInputs prev.cuda_demo_suite [
-    pkgs.freeglut
-    pkgs.libGLU
-    pkgs.libglvnd
-    pkgs.mesa
-    final.libcufft.lib
-    final.libcurand.lib
-  ];
-
-  nsight_compute = prev.nsight_compute.overrideAttrs (oldAttrs: {
-    nativeBuildInputs = oldAttrs.nativeBuildInputs
-    ++ (if (lib.versionOlder prev.nsight_compute.version "2022.2.0")
-       then [ pkgs.qt5.wrapQtAppsHook ]
-       else [ pkgs.qt6.wrapQtAppsHook ]);
-    buildInputs = oldAttrs.buildInputs
-    ++ (if (lib.versionOlder prev.nsight_compute.version "2022.2.0")
-       then [ pkgs.qt5.qtwebview ]
-       else [ pkgs.qt6.qtwebview ]);
-  });
-
-  nsight_systems = prev.nsight_systems.overrideAttrs (oldAttrs: {
-    nativeBuildInputs = oldAttrs.nativeBuildInputs ++ [
-      pkgs.addOpenGLRunpath
-      pkgs.qt5.wrapQtAppsHook
-    ];
-    buildInputs = oldAttrs.buildInputs ++ [
-      pkgs.alsa-lib
-      pkgs.e2fsprogs
-      pkgs.nss
-      pkgs.numactl
-      pkgs.pulseaudio
-      pkgs.wayland
-      pkgs.xorg.libXcursor
-      pkgs.xorg.libXdamage
-      pkgs.xorg.libXrandr
-      pkgs.xorg.libXtst
-    ];
-    # libcuda needs to be resolved during runtime
-    autoPatchelfIgnoreMissingDeps = true;
-  });
-
-  nvidia_driver = prev.nvidia_driver.overrideAttrs (oldAttrs: {
-    # libcuda needs to be resolved during runtime
-    autoPatchelfIgnoreMissingDeps = ["libcuda.so.1"];
-    # No need to support this package as we have drivers already
-    # in linuxPackages.
-    meta.broken = true;
-  });
-})
diff --git a/pkgs/development/compilers/cudatoolkit/saxpy/CMakeLists.txt b/pkgs/development/compilers/cudatoolkit/saxpy/CMakeLists.txt
deleted file mode 100644
index a6954e6e8bee2..0000000000000
--- a/pkgs/development/compilers/cudatoolkit/saxpy/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-cmake_minimum_required(VERSION 3.25)
-project(saxpy LANGUAGES CXX CUDA)
-
-find_package(CUDAToolkit REQUIRED COMPONENTS cudart cublas)
-
-add_executable(saxpy saxpy.cu)
-target_link_libraries(saxpy PUBLIC CUDA::cublas CUDA::cudart m)
-target_compile_features(saxpy PRIVATE cxx_std_14)
-target_compile_options(saxpy PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
-                                     --expt-relaxed-constexpr>)
-
-install(TARGETS saxpy)
diff --git a/pkgs/development/compilers/cudatoolkit/saxpy/default.nix b/pkgs/development/compilers/cudatoolkit/saxpy/default.nix
deleted file mode 100644
index 2da6da29004dc..0000000000000
--- a/pkgs/development/compilers/cudatoolkit/saxpy/default.nix
+++ /dev/null
@@ -1,52 +0,0 @@
-{ autoAddOpenGLRunpathHook
-, backendStdenv
-, cmake
-, cuda_cccl ? null
-, cuda_cudart ? null
-, cudaFlags
-, cuda_nvcc ? null
-, cudatoolkit ? null
-, lib
-, libcublas ? null
-, setupCudaHook
-, stdenv
-}:
-
-backendStdenv.mkDerivation {
-  pname = "saxpy";
-  version = "unstable-2023-07-11";
-
-  src = ./.;
-
-  buildInputs = lib.optionals (cuda_cudart != null) [
-    libcublas
-    cuda_cudart
-    cuda_cccl
-  ] ++ lib.optionals (cuda_cudart == null) [
-    cudatoolkit
-  ];
-  nativeBuildInputs = [
-    cmake
-
-    # Alternatively, we could remove the propagated hook from cuda_nvcc and add
-    # directly:
-    # setupCudaHook
-    autoAddOpenGLRunpathHook
-  ] ++ lib.optionals (cuda_nvcc != null) [
-    cuda_nvcc
-  ] ++ lib.optionals (cuda_nvcc == null) [
-    cudatoolkit
-  ];
-
-  cmakeFlags = [
-    "-DCMAKE_VERBOSE_MAKEFILE=ON"
-    "-DCMAKE_CUDA_ARCHITECTURES=${with cudaFlags; builtins.concatStringsSep ";" (map dropDot cudaCapabilities)}"
-  ];
-
-  meta = {
-    description = "A simple (Single-precision AX Plus Y) FindCUDAToolkit.cmake example for testing cross-compilation";
-    license = lib.licenses.mit;
-    maintainers = lib.teams.cuda.members;
-    platforms = lib.platforms.unix;
-  };
-}
diff --git a/pkgs/development/compilers/cudatoolkit/saxpy/saxpy.cu b/pkgs/development/compilers/cudatoolkit/saxpy/saxpy.cu
deleted file mode 100644
index 912a6d1647b14..0000000000000
--- a/pkgs/development/compilers/cudatoolkit/saxpy/saxpy.cu
+++ /dev/null
@@ -1,68 +0,0 @@
-#include <cublas_v2.h>
-#include <cuda_runtime.h>
-#include <vector>
-
-#include <stdio.h>
-
-static inline void check(cudaError_t err, const char *context) {
-  if (err != cudaSuccess) {
-    fprintf(stderr, "CUDA error at %s: %s\n", context, cudaGetErrorString(err));
-    std::exit(EXIT_FAILURE);
-  }
-}
-
-#define CHECK(x) check(x, #x)
-
-__global__ void saxpy(int n, float a, float *x, float *y) {
-  int i = blockIdx.x * blockDim.x + threadIdx.x;
-  if (i < n)
-    y[i] = a * x[i] + y[i];
-}
-
-int main(void) {
-  setbuf(stderr, NULL);
-  fprintf(stderr, "Start\n");
-
-  int rtVersion, driverVersion;
-  CHECK(cudaRuntimeGetVersion(&rtVersion));
-  CHECK(cudaDriverGetVersion(&driverVersion));
-
-  fprintf(stderr, "Runtime version: %d\n", rtVersion);
-  fprintf(stderr, "Driver version: %d\n", driverVersion);
-
-  constexpr int N = 1 << 10;
-
-  std::vector<float> xHost(N), yHost(N);
-  for (int i = 0; i < N; i++) {
-    xHost[i] = 1.0f;
-    yHost[i] = 2.0f;
-  }
-
-  fprintf(stderr, "Host memory initialized, copying to the device\n");
-  fflush(stderr);
-
-  float *xDevice, *yDevice;
-  CHECK(cudaMalloc(&xDevice, N * sizeof(float)));
-  CHECK(cudaMalloc(&yDevice, N * sizeof(float)));
-
-  CHECK(cudaMemcpy(xDevice, xHost.data(), N * sizeof(float),
-                   cudaMemcpyHostToDevice));
-  CHECK(cudaMemcpy(yDevice, yHost.data(), N * sizeof(float),
-                   cudaMemcpyHostToDevice));
-  fprintf(stderr, "Scheduled a cudaMemcpy, calling the kernel\n");
-
-  saxpy<<<(N + 255) / 256, 256>>>(N, 2.0f, xDevice, yDevice);
-  fprintf(stderr, "Scheduled a kernel call\n");
-  CHECK(cudaGetLastError());
-
-  CHECK(cudaMemcpy(yHost.data(), yDevice, N * sizeof(float),
-                   cudaMemcpyDeviceToHost));
-
-  float maxError = 0.0f;
-  for (int i = 0; i < N; i++)
-    maxError = max(maxError, abs(yHost[i] - 4.0f));
-  fprintf(stderr, "Max error: %f\n", maxError);
-
-  CHECK(cudaFree(xDevice));
-  CHECK(cudaFree(yDevice));
-}
diff --git a/pkgs/development/compilers/cudatoolkit/stdenv.nix b/pkgs/development/compilers/cudatoolkit/stdenv.nix
deleted file mode 100644
index 95e783a682bff..0000000000000
--- a/pkgs/development/compilers/cudatoolkit/stdenv.nix
+++ /dev/null
@@ -1,33 +0,0 @@
-{ lib
-, nixpkgsCompatibleLibstdcxx
-, nvccCompatibleCC
-, overrideCC
-, stdenv
-, wrapCCWith
-}:
-
-let
-  cc = wrapCCWith
-    {
-      cc = nvccCompatibleCC;
-
-      # This option is for clang's libcxx, but we (ab)use it for gcc's libstdc++.
-      # Note that libstdc++ maintains forward-compatibility: if we load a newer
-      # libstdc++ into the process, we can still use libraries built against an
-      # older libstdc++. This, in practice, means that we should use libstdc++ from
-      # the same stdenv that the rest of nixpkgs uses.
-      # We currently do not try to support anything other than gcc and linux.
-      libcxx = nixpkgsCompatibleLibstdcxx;
-    };
-  cudaStdenv = overrideCC stdenv cc;
-  passthruExtra = {
-    inherit nixpkgsCompatibleLibstdcxx;
-    # cc already exposed
-  };
-  assertCondition = true;
-in
-lib.extendDerivation
-  assertCondition
-  passthruExtra
-  cudaStdenv
-
diff --git a/pkgs/development/cuda-modules/backend-stdenv.nix b/pkgs/development/cuda-modules/backend-stdenv.nix
new file mode 100644
index 0000000000000..10fedd1e6f271
--- /dev/null
+++ b/pkgs/development/cuda-modules/backend-stdenv.nix
@@ -0,0 +1,39 @@
+{
+  lib,
+  nvccCompatibilities,
+  cudaVersion,
+  buildPackages,
+  overrideCC,
+  stdenv,
+  wrapCCWith,
+}:
+let
+  gccMajorVersion = nvccCompatibilities.${cudaVersion}.gccMaxMajorVersion;
+  # We use buildPackages (= pkgsBuildHost) because we look for a gcc that
+  # runs on our build platform, and that produces executables for the host
+  # platform (= platform on which we deploy and run the downstream packages).
+  # The target platform of buildPackages.gcc is our host platform, so its
+  # .lib output should be the libstdc++ we want to be writing in the runpaths
+  # Cf. https://github.com/NixOS/nixpkgs/pull/225661#discussion_r1164564576
+  nixpkgsCompatibleLibstdcxx = buildPackages.gcc.cc.lib;
+  nvccCompatibleCC = buildPackages."gcc${gccMajorVersion}".cc;
+
+  cc = wrapCCWith {
+    cc = nvccCompatibleCC;
+
+    # This option is for clang's libcxx, but we (ab)use it for gcc's libstdc++.
+    # Note that libstdc++ maintains forward-compatibility: if we load a newer
+    # libstdc++ into the process, we can still use libraries built against an
+    # older libstdc++. This, in practice, means that we should use libstdc++ from
+    # the same stdenv that the rest of nixpkgs uses.
+    # We currently do not try to support anything other than gcc and linux.
+    libcxx = nixpkgsCompatibleLibstdcxx;
+  };
+  cudaStdenv = overrideCC stdenv cc;
+  passthruExtra = {
+    inherit nixpkgsCompatibleLibstdcxx;
+    # cc already exposed
+  };
+  assertCondition = true;
+in
+lib.extendDerivation assertCondition passthruExtra cudaStdenv
diff --git a/pkgs/development/cuda-modules/cuda/extension.nix b/pkgs/development/cuda-modules/cuda/extension.nix
new file mode 100644
index 0000000000000..20ec90d058468
--- /dev/null
+++ b/pkgs/development/cuda-modules/cuda/extension.nix
@@ -0,0 +1,101 @@
+{cudaVersion, lib}:
+let
+  inherit (lib) attrsets modules trivial;
+  redistName = "cuda";
+
+  # Manifest files for CUDA redistributables (aka redist). These can be found at
+  # https://developer.download.nvidia.com/compute/cuda/redist/
+  # Maps a cuda version to the specific version of the manifest.
+  cudaVersionMap = {
+    "11.4" = "11.4.4";
+    "11.5" = "11.5.2";
+    "11.6" = "11.6.2";
+    "11.7" = "11.7.1";
+    "11.8" = "11.8.0";
+    "12.0" = "12.0.1";
+    "12.1" = "12.1.1";
+    "12.2" = "12.2.2";
+  };
+
+  # Check if the current CUDA version is supported.
+  cudaVersionMappingExists = builtins.hasAttr cudaVersion cudaVersionMap;
+
+  # fullCudaVersion : String
+  fullCudaVersion = cudaVersionMap.${cudaVersion};
+
+  evaluatedModules = modules.evalModules {
+    modules = [
+      ../modules
+      # We need to nest the manifests in a config.cuda.manifests attribute so the
+      # module system can evaluate them.
+      {
+        cuda.manifests = {
+          redistrib = trivial.importJSON (./manifests + "/redistrib_${fullCudaVersion}.json");
+          feature = trivial.importJSON (./manifests + "/feature_${fullCudaVersion}.json");
+        };
+      }
+    ];
+  };
+
+  # Generally we prefer to do things involving getting attribute names with feature_manifest instead
+  # of redistrib_manifest because the feature manifest will have *only* the redist architecture
+  # names as the keys, whereas the redistrib manifest will also have things like version, name, license,
+  # and license_path.
+  featureManifest = evaluatedModules.config.cuda.manifests.feature;
+  redistribManifest = evaluatedModules.config.cuda.manifests.redistrib;
+
+  # Builder function which builds a single redist package for a given platform.
+  # buildRedistPackage : callPackage -> PackageName -> Derivation
+  buildRedistPackage =
+    callPackage: pname:
+    let
+      redistribRelease = redistribManifest.${pname};
+      featureRelease = featureManifest.${pname};
+      drv =
+        (callPackage ../generic-builders/manifest.nix {
+          # We pass the whole release to the builder because it has logic to handle
+          # the case we're trying to build on an unsupported platform.
+          inherit
+            pname
+            redistName
+            redistribRelease
+            featureRelease
+            ;
+        }).overrideAttrs
+          (
+            prevAttrs: {
+              # Add the package-specific license.
+              meta = prevAttrs.meta // {
+                license =
+                  let
+                    licensePath =
+                      if redistribRelease.license_path != null then
+                        redistribRelease.license_path
+                      else
+                        "${pname}/LICENSE.txt";
+                    url = "https://developer.download.nvidia.com/compute/cuda/redist/${licensePath}";
+                  in
+                  lib.licenses.nvidiaCudaRedist // {inherit url;};
+              };
+            }
+          );
+    in
+    drv;
+
+  # Build all the redist packages given final and prev.
+  redistPackages =
+    final: _prev:
+    # Wrap the whole thing in an optionalAttrs so we can return an empty set if the CUDA version
+    # is not supported.
+    # NOTE: We cannot include the call to optionalAttrs *in* the pipe as we would strictly evaluate the
+    # attrNames before we check if the CUDA version is supported.
+    attrsets.optionalAttrs cudaVersionMappingExists (
+      trivial.pipe featureManifest [
+        # Get all the package names
+        builtins.attrNames
+        # Build the redist packages
+        (trivial.flip attrsets.genAttrs (buildRedistPackage final.callPackage))
+      ]
+    );
+in
+redistPackages
diff --git a/pkgs/development/cuda-modules/cuda/overrides.nix b/pkgs/development/cuda-modules/cuda/overrides.nix
new file mode 100644
index 0000000000000..061d5da16bb59
--- /dev/null
+++ b/pkgs/development/cuda-modules/cuda/overrides.nix
@@ -0,0 +1,129 @@
+{cudaVersion, lib}:
+let
+  inherit (lib) attrsets lists strings;
+  # cudaVersionOlder : Version -> Boolean
+  cudaVersionOlder = strings.versionOlder cudaVersion;
+  # cudaVersionAtLeast : Version -> Boolean
+  cudaVersionAtLeast = strings.versionAtLeast cudaVersion;
+
+  addBuildInputs =
+    drv: buildInputs:
+    drv.overrideAttrs (prevAttrs: {buildInputs = prevAttrs.buildInputs ++ buildInputs;});
+in
+# NOTE: Filter out attributes that are not present in the previous version of
+# the package set. This is necessary to prevent the appearance of attributes
+# like `cuda_nvcc` in `cudaPackages_10_0, which predates redistributables.
+final: prev:
+attrsets.filterAttrs (attr: _: (builtins.hasAttr attr prev)) {
+  libcufile = prev.libcufile.overrideAttrs (
+    prevAttrs: {
+      buildInputs = prevAttrs.buildInputs ++ [
+        final.libcublas.lib
+        final.pkgs.numactl
+        final.pkgs.rdma-core
+      ];
+      # Before 11.7 libcufile depends on itself for some reason.
+      env.autoPatchelfIgnoreMissingDeps =
+        prevAttrs.env.autoPatchelfIgnoreMissingDeps
+        + strings.optionalString (cudaVersionOlder "11.7") " libcufile.so.0";
+    }
+  );
+
+  libcusolver = addBuildInputs prev.libcusolver (
+    # Always depends on this
+    [final.libcublas.lib]
+    # Dependency from 12.0 and on
+    ++ lists.optionals (cudaVersionAtLeast "12.0") [final.libnvjitlink.lib]
+    # Dependency from 12.1 and on
+    ++ lists.optionals (cudaVersionAtLeast "12.1") [final.libcusparse.lib]
+  );
+
+  libcusparse = addBuildInputs prev.libcusparse (
+    lists.optionals (cudaVersionAtLeast "12.0") [final.libnvjitlink.lib]
+  );
+
+  cuda_compat = prev.cuda_compat.overrideAttrs (
+    prevAttrs: {
+      env.autoPatchelfIgnoreMissingDeps =
+        prevAttrs.env.autoPatchelfIgnoreMissingDeps + " libnvrm_gpu.so libnvrm_mem.so";
+      # `cuda_compat` only works on aarch64-linux, and only when building for Jetson devices.
+      brokenConditions = prevAttrs.brokenConditions // {
+        "Trying to use cuda_compat on aarch64-linux targeting non-Jetson devices" =
+          !final.flags.isJetsonBuild;
+      };
+    }
+  );
+
+  cuda_gdb = addBuildInputs prev.cuda_gdb (
+    # x86_64 only needs gmp from 12.0 and on
+    lists.optionals (cudaVersionAtLeast "12.0") [final.pkgs.gmp]
+  );
+
+  cuda_nvcc = prev.cuda_nvcc.overrideAttrs (
+    oldAttrs: {
+      propagatedBuildInputs = [final.setupCudaHook];
+
+      meta = (oldAttrs.meta or {}) // {
+        mainProgram = "nvcc";
+      };
+    }
+  );
+
+  cuda_nvprof = prev.cuda_nvprof.overrideAttrs (
+    prevAttrs: {buildInputs = prevAttrs.buildInputs ++ [final.cuda_cupti.lib];}
+  );
+
+  cuda_demo_suite = addBuildInputs prev.cuda_demo_suite [
+    final.pkgs.freeglut
+    final.pkgs.libGLU
+    final.pkgs.libglvnd
+    final.pkgs.mesa
+    final.libcufft.lib
+    final.libcurand.lib
+  ];
+
+  nsight_compute = prev.nsight_compute.overrideAttrs (
+    prevAttrs: {
+      nativeBuildInputs =
+        prevAttrs.nativeBuildInputs
+        ++ (
+          if (strings.versionOlder prev.nsight_compute.version "2022.2.0") then
+            [final.pkgs.qt5.wrapQtAppsHook]
+          else
+            [final.pkgs.qt6.wrapQtAppsHook]
+        );
+      buildInputs =
+        prevAttrs.buildInputs
+        ++ (
+          if (strings.versionOlder prev.nsight_compute.version "2022.2.0") then
+            [final.pkgs.qt5.qtwebview]
+          else
+            [final.pkgs.qt6.qtwebview]
+        );
+    }
+  );
+
+  nsight_systems = prev.nsight_systems.overrideAttrs (
+    prevAttrs: {
+      nativeBuildInputs = prevAttrs.nativeBuildInputs ++ [final.pkgs.qt5.wrapQtAppsHook];
+      buildInputs = prevAttrs.buildInputs ++ [
+        final.pkgs.alsa-lib
+        final.pkgs.e2fsprogs
+        final.pkgs.nss
+        final.pkgs.numactl
+        final.pkgs.pulseaudio
+        final.pkgs.wayland
+        final.pkgs.xorg.libXcursor
+        final.pkgs.xorg.libXdamage
+        final.pkgs.xorg.libXrandr
+        final.pkgs.xorg.libXtst
+      ];
+    }
+  );
+
+  nvidia_driver = prev.nvidia_driver.overrideAttrs {
+    # No need to support this package as we have drivers already
+    # in linuxPackages.
+    meta.broken = true;
+  };
+}
diff --git a/pkgs/development/cuda-modules/cudatoolkit/default.nix b/pkgs/development/cuda-modules/cudatoolkit/default.nix
new file mode 100644
index 0000000000000..e0868e2b1c2cb
--- /dev/null
+++ b/pkgs/development/cuda-modules/cudatoolkit/default.nix
@@ -0,0 +1,374 @@
+{
+  cudaVersion,
+  runPatches ? [],
+  autoPatchelfHook,
+  autoAddOpenGLRunpathHook,
+  addOpenGLRunpath,
+  alsa-lib,
+  curlMinimal,
+  expat,
+  fetchurl,
+  fontconfig,
+  freetype,
+  gdk-pixbuf,
+  glib,
+  glibc,
+  gst_all_1,
+  gtk2,
+  lib,
+  libxkbcommon,
+  libkrb5,
+  krb5,
+  makeWrapper,
+  markForCudatoolkitRootHook,
+  ncurses5,
+  numactl,
+  nss,
+  patchelf,
+  perl,
+  python3, # FIXME: CUDAToolkit 10 may still need python27
+  pulseaudio,
+  setupCudaHook,
+  stdenv,
+  backendStdenv, # E.g. gcc11Stdenv, set in extension.nix
+  unixODBC,
+  wayland,
+  xorg,
+  zlib,
+  freeglut,
+  libGLU,
+  libsForQt5,
+  libtiff,
+  qt6Packages,
+  qt6,
+  rdma-core,
+  ucx,
+  rsync,
+}:
+
+let
+  # Version info for the classic cudatoolkit packages that contain everything that is in redist.
+  releases = builtins.import ./releases.nix;
+  release = releases.${cudaVersion};
+in
+
+backendStdenv.mkDerivation rec {
+  pname = "cudatoolkit";
+  inherit (release) version;
+  inherit runPatches;
+
+  dontPatchELF = true;
+  dontStrip = true;
+
+  src = fetchurl {inherit (release) url sha256;};
+
+  outputs = [
+    "out"
+    "lib"
+    "doc"
+  ];
+
+  nativeBuildInputs =
+    [
+      perl
+      makeWrapper
+      rsync
+      addOpenGLRunpath
+      autoPatchelfHook
+      autoAddOpenGLRunpathHook
+      markForCudatoolkitRootHook
+    ]
+    ++ lib.optionals (lib.versionOlder version "11") [libsForQt5.wrapQtAppsHook]
+    ++ lib.optionals (lib.versionAtLeast version "11.8") [qt6Packages.wrapQtAppsHook];
+  depsTargetTargetPropagated = [setupCudaHook];
+  buildInputs =
+    lib.optionals (lib.versionOlder version "11") [
+      libsForQt5.qt5.qtwebengine
+      freeglut
+      libGLU
+    ]
+    ++ [
+      # To get $GDK_PIXBUF_MODULE_FILE via setup-hook
+      gdk-pixbuf
+
+      # For autoPatchelf
+      ncurses5
+      expat
+      python3
+      zlib
+      glibc
+      xorg.libX11
+      xorg.libXext
+      xorg.libXrender
+      xorg.libXt
+      xorg.libXtst
+      xorg.libXi
+      xorg.libXext
+      xorg.libXdamage
+      xorg.libxcb
+      xorg.xcbutilimage
+      xorg.xcbutilrenderutil
+      xorg.xcbutilwm
+      xorg.xcbutilkeysyms
+      pulseaudio
+      libxkbcommon
+      libkrb5
+      krb5
+      gtk2
+      glib
+      fontconfig
+      freetype
+      numactl
+      nss
+      unixODBC
+      alsa-lib
+      wayland
+    ]
+    ++ lib.optionals (lib.versionAtLeast version "11.8") [
+      (lib.getLib libtiff)
+      qt6Packages.qtwayland
+      rdma-core
+      (ucx.override {enableCuda = false;}) # Avoid infinite recursion
+      xorg.libxshmfence
+      xorg.libxkbfile
+    ]
+    ++ (lib.optionals (lib.versionAtLeast version "12") (
+      map lib.getLib ([
+        # Used by `/target-linux-x64/CollectX/clx` and `/target-linux-x64/CollectX/libclx_api.so` for:
+        # - `libcurl.so.4`
+        curlMinimal
+
+        # Used by `/host-linux-x64/Scripts/WebRTCContainer/setup/neko/server/bin/neko`
+        gst_all_1.gstreamer
+        gst_all_1.gst-plugins-base
+      ])
+      ++ (
+        with qt6; [
+          qtmultimedia
+          qttools
+          qtpositioning
+          qtscxml
+          qtsvg
+          qtwebchannel
+          qtwebengine
+        ]
+      )
+    ));
+
+  # Prepended to runpaths by autoPatchelf.
+  # The order inherited from older rpath preFixup code
+  runtimeDependencies = [
+    (placeholder "lib")
+    (placeholder "out")
+    "${placeholder "out"}/nvvm"
+    # NOTE: use the same libstdc++ as the rest of nixpkgs, not from backendStdenv
+    "${lib.getLib stdenv.cc.cc}/lib64"
+    "${placeholder "out"}/jre/lib/amd64/jli"
+    "${placeholder "out"}/lib64"
+    "${placeholder "out"}/nvvm/lib64"
+  ];
+
+  autoPatchelfIgnoreMissingDeps = [
+    # This is the hardware-dependent userspace driver that comes from
+    # nvidia_x11 package. It must be deployed at runtime in
+    # /run/opengl-driver/lib or pointed at by LD_LIBRARY_PATH variable, rather
+    # than pinned in runpath
+    "libcuda.so.1"
+
+    # The krb5 expression ships libcom_err.so.3 but cudatoolkit asks for the
+    # older
+    # This dependency is asked for by target-linux-x64/CollectX/RedHat/x86_64/libssl.so.10
+    # - do we even want to use nvidia-shipped libssl?
+    "libcom_err.so.2"
+  ];
+
+  preFixup =
+    if lib.versionOlder version "11" then
+      ''
+        ${lib.getExe' patchelf "patchelf"} $out/targets/*/lib/libnvrtc.so --add-needed libnvrtc-builtins.so
+      ''
+    else
+      ''
+        ${lib.getExe' patchelf "patchelf"} $out/lib64/libnvrtc.so --add-needed libnvrtc-builtins.so
+      '';
+
+  unpackPhase = ''
+    sh $src --keep --noexec
+
+    ${lib.optionalString (lib.versionOlder version "10.1") ''
+      cd pkg/run_files
+      sh cuda-linux*.run --keep --noexec
+      sh cuda-samples*.run --keep --noexec
+      mv pkg ../../$(basename $src)
+      cd ../..
+      rm -rf pkg
+
+      for patch in $runPatches; do
+        sh $patch --keep --noexec
+        mv pkg $(basename $patch)
+      done
+    ''}
+  '';
+
+  installPhase =
+    ''
+      runHook preInstall
+      mkdir $out
+      ${lib.optionalString (lib.versionOlder version "10.1") ''
+        cd $(basename $src)
+        export PERL5LIB=.
+        perl ./install-linux.pl --prefix="$out"
+        cd ..
+        for patch in $runPatches; do
+          cd $(basename $patch)
+          perl ./install_patch.pl --silent --accept-eula --installdir="$out"
+          cd ..
+        done
+      ''}
+      ${lib.optionalString (lib.versionAtLeast version "10.1" && lib.versionOlder version "11") ''
+        cd pkg/builds/cuda-toolkit
+        mv * $out/
+      ''}
+      ${lib.optionalString (lib.versionAtLeast version "11") ''
+        mkdir -p $out/bin $out/lib64 $out/include $doc
+        for dir in pkg/builds/* pkg/builds/cuda_nvcc/nvvm pkg/builds/cuda_cupti/extras/CUPTI; do
+          if [ -d $dir/bin ]; then
+            mv $dir/bin/* $out/bin
+          fi
+          if [ -d $dir/doc ]; then
+            (cd $dir/doc && find . -type d -exec mkdir -p $doc/\{} \;)
+            (cd $dir/doc && find . \( -type f -o -type l \) -exec mv \{} $doc/\{} \;)
+          fi
+          if [ -L $dir/include ] || [ -d $dir/include ]; then
+            (cd $dir/include && find . -type d -exec mkdir -p $out/include/\{} \;)
+            (cd $dir/include && find . \( -type f -o -type l \) -exec mv \{} $out/include/\{} \;)
+          fi
+          if [ -L $dir/lib64 ] || [ -d $dir/lib64 ]; then
+            (cd $dir/lib64 && find . -type d -exec mkdir -p $out/lib64/\{} \;)
+            (cd $dir/lib64 && find . \( -type f -o -type l \) -exec mv \{} $out/lib64/\{} \;)
+          fi
+        done
+        mv pkg/builds/cuda_nvcc/nvvm $out/nvvm
+
+        mv pkg/builds/cuda_sanitizer_api $out/cuda_sanitizer_api
+        ln -s $out/cuda_sanitizer_api/compute-sanitizer/compute-sanitizer $out/bin/compute-sanitizer
+
+        mv pkg/builds/nsight_systems/target-linux-x64 $out/target-linux-x64
+        mv pkg/builds/nsight_systems/host-linux-x64 $out/host-linux-x64
+        rm $out/host-linux-x64/libstdc++.so*
+      ''}
+        ${
+          lib.optionalString (lib.versionAtLeast version "11.8")
+            # error: auto-patchelf could not satisfy dependency libtiff.so.5 wanted by /nix/store/.......-cudatoolkit-12.0.1/host-linux-x64/Plugins/imageformats/libqtiff.so
+            # we only ship libtiff.so.6, so let's use qt plugins built by Nix.
+            # TODO: don't copy, come up with a symlink-based "merge"
+            ''
+              rsync ${lib.getLib qt6Packages.qtimageformats}/lib/qt-6/plugins/ $out/host-linux-x64/Plugins/ -aP
+            ''
+        }
+
+      rm -f $out/tools/CUDA_Occupancy_Calculator.xls # FIXME: why?
+
+      ${lib.optionalString (lib.versionOlder version "10.1") ''
+        # let's remove the 32-bit libraries, they confuse the lib64->lib mover
+        rm -rf $out/lib
+      ''}
+
+      ${lib.optionalString (lib.versionAtLeast version "12.0") ''
+        rm $out/host-linux-x64/libQt6*
+      ''}
+
+      # Remove some cruft.
+      ${lib.optionalString ((lib.versionAtLeast version "7.0") && (lib.versionOlder version "10.1"))
+        "rm $out/bin/uninstall*"}
+
+      # Fixup path to samples (needed for cuda 6.5 or else nsight will not find them)
+      if [ -d "$out"/cuda-samples ]; then
+          mv "$out"/cuda-samples "$out"/samples
+      fi
+
+      # Change the #error on GCC > 4.9 to a #warning.
+      sed -i $out/include/host_config.h -e 's/#error\(.*unsupported GNU version\)/#warning\1/'
+
+      # Fix builds with newer glibc version
+      sed -i "1 i#define _BITS_FLOATN_H" "$out/include/host_defines.h"
+    ''
+    +
+      # Point NVCC at a compatible compiler
+      # CUDA_TOOLKIT_ROOT_DIR is legacy,
+      # Cf. https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables
+      ''
+        mkdir -p $out/nix-support
+        cat <<EOF >> $out/nix-support/setup-hook
+        cmakeFlags+=' -DCUDA_TOOLKIT_ROOT_DIR=$out'
+        EOF
+
+        # Move some libraries to the lib output so that programs that
+        # depend on them don't pull in this entire monstrosity.
+        mkdir -p $lib/lib
+        mv -v $out/lib64/libcudart* $lib/lib/
+
+        # Remove OpenCL libraries as they are provided by ocl-icd and driver.
+        rm -f $out/lib64/libOpenCL*
+        ${lib.optionalString (lib.versionAtLeast version "10.1" && (lib.versionOlder version "11")) ''
+          mv $out/lib64 $out/lib
+          mv $out/extras/CUPTI/lib64/libcupti* $out/lib
+        ''}
+
+        # nvprof do not find any program to profile if LD_LIBRARY_PATH is not set
+        wrapProgram $out/bin/nvprof \
+          --prefix LD_LIBRARY_PATH : $out/lib
+      ''
+    + lib.optionalString (lib.versionOlder version "8.0") ''
+      # Hack to fix building against recent Glibc/GCC.
+      echo "NIX_CFLAGS_COMPILE+=' -D_FORCE_INLINES'" >> $out/nix-support/setup-hook
+    ''
+    # 11.8 includes a broken symlink, include/include, pointing to targets/x86_64-linux/include
+    + lib.optionalString (lib.versions.majorMinor version == "11.8") ''
+      rm $out/include/include
+    ''
+    + ''
+      runHook postInstall
+    '';
+
+  postInstall = ''
+    for b in nvvp ${lib.optionalString (lib.versionOlder version "11") "nsight"}; do
+      wrapProgram "$out/bin/$b" \
+        --set GDK_PIXBUF_MODULE_FILE "$GDK_PIXBUF_MODULE_FILE"
+    done
+  '';
+
+  # cuda-gdb doesn't run correctly when not using sandboxing, so
+  # temporarily disabling the install check.  This should be set to true
+  # when we figure out how to get `cuda-gdb --version` to run correctly
+  # when not using sandboxing.
+  doInstallCheck = false;
+  postInstallCheck = ''
+    # Smoke test binaries
+    pushd $out/bin
+    for f in *; do
+      case $f in
+        crt)                           continue;;
+        nvcc.profile)                  continue;;
+        nsight_ee_plugins_manage.sh)   continue;;
+        uninstall_cuda_toolkit_6.5.pl) continue;;
+        computeprof|nvvp|nsight)       continue;; # GUIs don't feature "--version"
+        *)                             echo "Executing '$f --version':"; ./$f --version;;
+      esac
+    done
+    popd
+  '';
+  passthru = {
+    inherit (backendStdenv) cc;
+    majorMinorVersion = lib.versions.majorMinor version;
+    majorVersion = lib.versions.majorMinor version;
+  };
+
+  meta = with lib; {
+    description = "A compiler for NVIDIA GPUs, math libraries, and tools";
+    homepage = "https://developer.nvidia.com/cuda-toolkit";
+    platforms = ["x86_64-linux"];
+    license = licenses.nvidiaCuda;
+    maintainers = teams.cuda.members;
+  };
+}
diff --git a/pkgs/development/cuda-modules/cudnn/fixup.nix b/pkgs/development/cuda-modules/cudnn/fixup.nix
new file mode 100644
index 0000000000000..1fb5a6ad015e4
--- /dev/null
+++ b/pkgs/development/cuda-modules/cudnn/fixup.nix
@@ -0,0 +1,69 @@
+{
+  cudaVersion,
+  fetchurl,
+  final,
+  lib,
+  package,
+  patchelf,
+  zlib,
+  ...
+}:
+let
+  inherit (lib)
+    lists
+    maintainers
+    meta
+    strings
+    ;
+in
+finalAttrs: prevAttrs: {
+  src = fetchurl {inherit (package) url hash;};
+
+  # Useful for inspecting why something went wrong.
+  brokenConditions =
+    let
+      cudaTooOld = strings.versionOlder cudaVersion package.minCudaVersion;
+      cudaTooNew =
+        (package.maxCudaVersion != null) && strings.versionOlder package.maxCudaVersion cudaVersion;
+    in
+    prevAttrs.brokenConditions
+    // {
+      "CUDA version is too old" = cudaTooOld;
+      "CUDA version is too new" = cudaTooNew;
+    };
+
+  buildInputs =
+    prevAttrs.buildInputs
+    ++ [zlib]
+    ++ lists.optionals finalAttrs.passthru.useCudatoolkitRunfile [final.cudatoolkit]
+    ++ lists.optionals (!finalAttrs.passthru.useCudatoolkitRunfile) [final.libcublas.lib];
+
+  # Tell autoPatchelf about runtime dependencies.
+  # NOTE: Versions from CUDNN releases have four components.
+  postFixup = strings.optionalString (strings.versionAtLeast finalAttrs.version "8.0.5.0") ''
+    ${meta.getExe' patchelf "patchelf"} $lib/lib/libcudnn.so --add-needed libcudnn_cnn_infer.so
+    ${meta.getExe' patchelf "patchelf"} $lib/lib/libcudnn_ops_infer.so --add-needed libcublas.so --add-needed libcublasLt.so
+  '';
+
+  passthru.useCudatoolkitRunfile = strings.versionOlder cudaVersion "11.3.999";
+
+  meta = prevAttrs.meta // {
+    homepage = "https://developer.nvidia.com/cudnn";
+    maintainers =
+      prevAttrs.meta.maintainers
+      ++ (
+        with maintainers; [
+          mdaiter
+          samuela
+          connorbaker
+        ]
+      );
+    license = {
+      shortName = "cuDNN EULA";
+      fullName = "NVIDIA cuDNN Software License Agreement (EULA)";
+      url = "https://docs.nvidia.com/deeplearning/sdk/cudnn-sla/index.html#supplement";
+      free = false;
+      redistributable = !finalAttrs.passthru.useCudatoolkitRunfile;
+    };
+  };
+}
diff --git a/pkgs/development/cuda-modules/cudnn/shims.nix b/pkgs/development/cuda-modules/cudnn/shims.nix
new file mode 100644
index 0000000000000..e9eca8ef7c8b9
--- /dev/null
+++ b/pkgs/development/cuda-modules/cudnn/shims.nix
@@ -0,0 +1,13 @@
+# Shims to mimic the shape of ../modules/generic/manifests/{feature,redistrib}/release.nix
+{package, redistArch}:
+{
+  featureRelease.${redistArch}.outputs = {
+    lib = true;
+    static = true;
+    dev = true;
+  };
+  redistribRelease = {
+    name = "NVIDIA CUDA Deep Neural Network library (cuDNN)";
+    inherit (package) version;
+  };
+}
diff --git a/pkgs/development/cuda-modules/cutensor/extension.nix b/pkgs/development/cuda-modules/cutensor/extension.nix
new file mode 100644
index 0000000000000..b762fd22ede88
--- /dev/null
+++ b/pkgs/development/cuda-modules/cutensor/extension.nix
@@ -0,0 +1,164 @@
+# Support matrix can be found at
+# https://docs.nvidia.com/deeplearning/cudnn/archives/cudnn-880/support-matrix/index.html
+#
+# TODO(@connorbaker):
+# This is a very similar strategy to CUDA/CUDNN:
+#
+# - Get all versions supported by the current release of CUDA
+# - Build all of them
+# - Make the newest the default
+#
+# Unique twists:
+#
+# - Instead of providing different releases for each version of CUDA, CuTensor has multiple subdirectories in `lib`
+#   -- one for each version of CUDA.
+{
+  cudaVersion,
+  flags,
+  hostPlatform,
+  lib,
+  mkVersionedPackageName,
+}:
+let
+  inherit (lib)
+    attrsets
+    lists
+    modules
+    versions
+    strings
+    trivial
+    ;
+
+  redistName = "cutensor";
+  pname = "libcutensor";
+
+  cutensorVersions = [
+    "1.3.3"
+    "1.4.0"
+    "1.5.0"
+    "1.6.2"
+    "1.7.0"
+  ];
+
+  # Manifests :: { redistrib, feature }
+
+  # Each release of cutensor gets mapped to an evaluated module for that release.
+  # From there, we can get the min/max CUDA versions supported by that release.
+  # listOfManifests :: List Manifests
+  listOfManifests =
+    let
+      configEvaluator =
+        fullCutensorVersion:
+        modules.evalModules {
+          modules = [
+            ../modules
+            # We need to nest the manifests in a config.cutensor.manifests attribute so the
+            # module system can evaluate them.
+            {
+              cutensor.manifests = {
+                redistrib = trivial.importJSON (./manifests + "/redistrib_${fullCutensorVersion}.json");
+                feature = trivial.importJSON (./manifests + "/feature_${fullCutensorVersion}.json");
+              };
+            }
+          ];
+        };
+      # Un-nest the manifests attribute set.
+      releaseGrabber = evaluatedModules: evaluatedModules.config.cutensor.manifests;
+    in
+    lists.map
+      (trivial.flip trivial.pipe [
+        configEvaluator
+        releaseGrabber
+      ])
+      cutensorVersions;
+
+  # Our cudaVersion tells us which version of CUDA we're building against.
+  # The subdirectories in lib/ tell us which versions of CUDA are supported.
+  # Typically the names will look like this:
+  #
+  # - 10.2
+  # - 11
+  # - 11.0
+  # - 12
+
+  # libPath :: String
+  libPath =
+    let
+      cudaMajorMinor = versions.majorMinor cudaVersion;
+      cudaMajor = versions.major cudaVersion;
+    in
+    if cudaMajorMinor == "10.2" then cudaMajorMinor else cudaMajor;
+
+  # A release is supported if it has a libPath that matches our CUDA version for our platform.
+  # LibPath are not constant across the same release -- one platform may support fewer
+  # CUDA versions than another.
+  redistArch = flags.getRedistArch hostPlatform.system;
+  # platformIsSupported :: Manifests -> Boolean
+  platformIsSupported =
+    {feature, ...}:
+    (attrsets.attrByPath
+      [
+        pname
+        redistArch
+      ]
+      null
+      feature
+    ) != null;
+
+  # TODO(@connorbaker): With an auxilliary file keeping track of the CUDA versions each release supports,
+  # we could filter out releases that don't support our CUDA version.
+  # However, we don't have that currently, so we make a best-effort to try to build TensorRT with whatever
+  # libPath corresponds to our CUDA version.
+  # supportedManifests :: List Manifests
+  supportedManifests = builtins.filter platformIsSupported listOfManifests;
+
+  # Compute versioned attribute name to be used in this package set
+  # Patch version changes should not break the build, so we only use major and minor
+  # computeName :: RedistribRelease -> String
+  computeName = {version, ...}: mkVersionedPackageName redistName version;
+in
+final: _:
+let
+  # buildCutensorPackage :: Manifests -> AttrSet Derivation
+  buildCutensorPackage =
+    {redistrib, feature}:
+    let
+      drv = final.callPackage ../generic-builders/manifest.nix {
+        inherit pname redistName libPath;
+        redistribRelease = redistrib.${pname};
+        featureRelease = feature.${pname};
+      };
+      fixedDrv = drv.overrideAttrs (
+        prevAttrs: {
+          buildInputs =
+            prevAttrs.buildInputs
+            ++ lists.optionals (strings.versionOlder cudaVersion "11.4") [final.cudatoolkit]
+            ++ lists.optionals (strings.versionAtLeast cudaVersion "11.4") (
+              [final.libcublas.lib]
+              # For some reason, the 1.4.x release of cuTENSOR requires the cudart library.
+              ++ lists.optionals (strings.hasPrefix "1.4" redistrib.${pname}.version) [final.cuda_cudart.lib]
+            );
+          meta = prevAttrs.meta // {
+            description = "cuTENSOR: A High-Performance CUDA Library For Tensor Primitives";
+            homepage = "https://developer.nvidia.com/cutensor";
+            maintainers = prevAttrs.meta.maintainers ++ [lib.maintainers.obsidian-systems-maintenance];
+            license = lib.licenses.unfreeRedistributable // {
+              shortName = "cuTENSOR EULA";
+              name = "cuTENSOR SUPPLEMENT TO SOFTWARE LICENSE AGREEMENT FOR NVIDIA SOFTWARE DEVELOPMENT KITS";
+              url = "https://docs.nvidia.com/cuda/cutensor/license.html";
+            };
+          };
+        }
+      );
+    in
+    attrsets.nameValuePair (computeName redistrib.${pname}) fixedDrv;
+
+  extension =
+    let
+      nameOfNewest = computeName (lists.last supportedManifests).redistrib.${pname};
+      drvs = builtins.listToAttrs (lists.map buildCutensorPackage supportedManifests);
+      containsDefault = attrsets.optionalAttrs (drvs != {}) {cutensor = drvs.${nameOfNewest};};
+    in
+    drvs // containsDefault;
+in
+extension
diff --git a/pkgs/development/cuda-modules/flags.nix b/pkgs/development/cuda-modules/flags.nix
index 7b7922085ff66..139952bc9dfd9 100644
--- a/pkgs/development/cuda-modules/flags.nix
+++ b/pkgs/development/cuda-modules/flags.nix
@@ -53,11 +53,11 @@ let
   isDefault =
     gpu:
     let
-      inherit (gpu) dontDefaultAfter;
+      inherit (gpu) dontDefaultAfter isJetson;
       newGpu = dontDefaultAfter == null;
       recentGpu = newGpu || strings.versionAtLeast dontDefaultAfter cudaVersion;
     in
-    recentGpu;
+    recentGpu && !isJetson;
 
   # supportedGpus :: List Gpu
   # GPUs which are supported by the provided CUDA version.
@@ -100,11 +100,11 @@ let
   ];
 
   # Find the intersection with the user-specified list of cudaCapabilities.
-  # NOTE: Jetson devices are never built by default because they cannot be targeted along
+  # NOTE: Jetson devices are never built by default because they cannot be targeted along with
   # non-Jetson devices and require an aarch64 host platform. As such, if they're present anywhere,
   # they must be in the user-specified cudaCapabilities.
   # NOTE: We don't need to worry about mixes of Jetson and non-Jetson devices here -- there's
-  # sanity-checking for all that in cudaFlags.
+  # sanity-checking for all that in below.
   jetsonTargets = lists.intersectLists jetsonComputeCapabilities cudaCapabilities;
 
   # dropDot :: String -> String
@@ -146,14 +146,15 @@ let
       builtins.throw "Unsupported Nix system: ${nixSystem}";
 
   # Maps NVIDIA redist arch to Nix system.
+  # It is imperative that we include the boolean condition based on jetsonTargets to ensure
+  # we don't advertise availability of packages only available on server-grade ARM
+  # as being available for the Jetson, since both `linux-sbsa` and `linux-aarch64` are
+  # mapped to the Nix system `aarch64-linux`.
   getNixSystem =
     redistArch:
-    if
-      lists.elem redistArch [
-        "linux-aarch64"
-        "linux-sbsa"
-      ]
-    then
+    if redistArch == "linux-sbsa" && jetsonTargets == [] then
+      "aarch64-linux"
+    else if redistArch == "linux-aarch64" && jetsonTargets != [] then
       "aarch64-linux"
     else if redistArch == "linux-x86_64" then
       "x86_64-linux"
@@ -217,26 +218,28 @@ let
       # isJetsonBuild :: Boolean
       isJetsonBuild =
         let
-          # List of booleans representing whether any of the currently targeted capabilities are
-          # Jetson devices.
-          # isJetsons :: List Boolean
-          isJetsons =
-            lists.map (trivial.flip builtins.getAttr cudaComputeCapabilityToIsJetson)
+          requestedJetsonDevices =
+            lists.filter (cap: cudaComputeCapabilityToIsJetson.${cap})
+              cudaCapabilities;
+          requestedNonJetsonDevices =
+            lists.filter (cap: !(builtins.elem cap requestedJetsonDevices))
               cudaCapabilities;
-          anyJetsons = lists.any (trivial.id) isJetsons;
-          allJetsons = lists.all (trivial.id) isJetsons;
-          hostIsAarch64 = hostPlatform.isAarch64;
+          jetsonBuildSufficientCondition = requestedJetsonDevices != [];
+          jetsonBuildNecessaryCondition = requestedNonJetsonDevices == [] && hostPlatform.isAarch64;
         in
-        trivial.throwIfNot (anyJetsons -> (allJetsons && hostIsAarch64))
+        trivial.throwIf (jetsonBuildSufficientCondition && !jetsonBuildNecessaryCondition)
           ''
             Jetson devices cannot be targeted with non-Jetson devices. Additionally, they require hostPlatform to be aarch64.
             You requested ${builtins.toJSON cudaCapabilities} for host platform ${hostPlatform.system}.
+            Requested Jetson devices: ${builtins.toJSON requestedJetsonDevices}.
+            Requested non-Jetson devices: ${builtins.toJSON requestedNonJetsonDevices}.
             Exactly one of the following must be true:
-            - All CUDA capabilities belong to Jetson devices (${trivial.boolToString allJetsons}) and the hostPlatform is aarch64 (${trivial.boolToString hostIsAarch64}).
-            - No CUDA capabilities belong to Jetson devices (${trivial.boolToString (!anyJetsons)}).
+            - All CUDA capabilities belong to Jetson devices and hostPlatform is aarch64.
+            - No CUDA capabilities belong to Jetson devices.
             See ${./gpus.nix} for a list of architectures supported by this version of Nixpkgs.
           ''
-          allJetsons;
+          jetsonBuildSufficientCondition
+        && jetsonBuildNecessaryCondition;
     };
 in
 # When changing names or formats: pause, validate, and update the assert
@@ -283,7 +286,7 @@ assert let
   };
   actualWrapped = (builtins.tryEval (builtins.deepSeq actual actual)).value;
 in
-asserts.assertMsg (expected == actualWrapped) ''
+asserts.assertMsg ((strings.versionAtLeast cudaVersion "11.2") -> (expected == actualWrapped)) ''
   This test should only fail when using a version of CUDA older than 11.2, the first to support
   8.6.
   Expected: ${builtins.toJSON expected}
diff --git a/pkgs/development/cuda-modules/generic-builders/manifest.nix b/pkgs/development/cuda-modules/generic-builders/manifest.nix
new file mode 100644
index 0000000000000..01398d1165116
--- /dev/null
+++ b/pkgs/development/cuda-modules/generic-builders/manifest.nix
@@ -0,0 +1,249 @@
+{
+  # General callPackage-supplied arguments
+  autoAddOpenGLRunpathHook,
+  autoPatchelfHook,
+  backendStdenv,
+  fetchurl,
+  lib,
+  lndir,
+  markForCudatoolkitRootHook,
+  flags,
+  stdenv,
+  hostPlatform,
+  # Builder-specific arguments
+  # Short package name (e.g., "cuda_cccl")
+  # pname : String
+  pname,
+  # Common name (e.g., "cutensor" or "cudnn") -- used in the URL.
+  # Also known as the Redistributable Name.
+  # redistName : String,
+  redistName,
+  # If libPath is non-null, it must be a subdirectory of `lib`.
+  # The contents of `libPath` will be moved to the root of `lib`.
+  libPath ? null,
+  # See ./modules/generic/manifests/redistrib/release.nix
+  redistribRelease,
+  # See ./modules/generic/manifests/feature/release.nix
+  featureRelease,
+}:
+let
+  inherit (lib)
+    attrsets
+    lists
+    meta
+    strings
+    trivial
+    licenses
+    teams
+    sourceTypes
+    ;
+
+  # Get the redist architectures for which package provides distributables.
+  # These are used by meta.platforms.
+  supportedRedistArchs = builtins.attrNames featureRelease;
+  redistArch = flags.getRedistArch hostPlatform.system;
+in
+backendStdenv.mkDerivation (
+  finalAttrs: {
+    # NOTE: Even though there's no actual buildPhase going on here, the derivations of the
+    # redistributables are sensitive to the compiler flags provided to stdenv. The patchelf package
+    # is sensitive to the compiler flags provided to stdenv, and we depend on it. As such, we are
+    # also sensitive to the compiler flags provided to stdenv.
+    inherit pname;
+    inherit (redistribRelease) version;
+
+    # Don't force serialization to string for structured attributes, like outputToPatterns
+    # and brokenConditions.
+    # Avoids "set cannot be coerced to string" errors.
+    __structuredAttrs = true;
+
+    # Keep better track of dependencies.
+    strictDeps = true;
+
+    # TODO(@connorbaker): Update `cuda-redist-find-features` to produce an attrset of boolean values for the
+    # outputs instead of `has*` attributes.
+    # NOTE: Outputs are evaluated jointly with meta, so in the case that this is an unsupported platform,
+    # we still need to provide a list of outputs.
+    outputs =
+      let
+        # Checks whether the redistributable provides an output.
+        hasOutput =
+          output:
+          attrsets.attrByPath
+            [
+              redistArch
+              "outputs"
+              output
+            ]
+            false
+            featureRelease;
+        # Order is important here so we use a list.
+        additionalOutputs = builtins.filter hasOutput [
+          "bin"
+          "lib"
+          "static"
+          "dev"
+          "doc"
+          "sample"
+          "python"
+        ];
+        # The out output is special -- it's the default output and we always include it.
+        outputs = ["out"] ++ additionalOutputs;
+      in
+      outputs;
+
+    # Traversed in the order of the outputs speficied in outputs;
+    # entries are skipped if they don't exist in outputs.
+    outputToPatterns = {
+      bin = ["bin"];
+      lib = [
+        "lib"
+        "lib64"
+      ];
+      static = ["**/*.a"];
+      sample = ["samples"];
+      python = ["**/*.whl"];
+    };
+
+    # Useful for introspecting why something went wrong.
+    # Maps descriptions of why the derivation would be marked broken to
+    # booleans indicating whether that description is true.
+    brokenConditions = {};
+
+    src = fetchurl {
+      url = "https://developer.download.nvidia.com/compute/${redistName}/redist/${
+        redistribRelease.${redistArch}.relative_path
+      }";
+      inherit (redistribRelease.${redistArch}) sha256;
+    };
+
+    # We do need some other phases, like configurePhase, so the multiple-output setup hook works.
+    dontBuild = true;
+
+    nativeBuildInputs = [
+      autoPatchelfHook
+      # This hook will make sure libcuda can be found
+      # in typically /lib/opengl-driver by adding that
+      # directory to the rpath of all ELF binaries.
+      # Check e.g. with `patchelf --print-rpath path/to/my/binary
+      autoAddOpenGLRunpathHook
+      markForCudatoolkitRootHook
+    ];
+
+    buildInputs =
+      [
+        # autoPatchelfHook will search for a libstdc++ and we're giving it
+        # one that is compatible with the rest of nixpkgs, even when
+        # nvcc forces us to use an older gcc
+        # NB: We don't actually know if this is the right thing to do
+        stdenv.cc.cc.lib
+      ];
+
+    # Picked up by autoPatchelf
+    # Needed e.g. for libnvrtc to locate (dlopen) libnvrtc-builtins
+    appendRunpaths = ["$ORIGIN"];
+
+    # NOTE: We don't need to check for dev or doc, because those outputs are handled by
+    # the multiple-outputs setup hook.
+    # NOTE: moveToOutput operates on all outputs:
+    # https://github.com/NixOS/nixpkgs/blob/2920b6fc16a9ed5d51429e94238b28306ceda79e/pkgs/build-support/setup-hooks/multiple-outputs.sh#L105-L107
+    installPhase =
+      let
+        mkMoveToOutputCommand =
+          output:
+          let
+            template = pattern: ''moveToOutput "${pattern}" "${"$" + output}"'';
+            patterns = finalAttrs.outputToPatterns.${output} or [];
+          in
+          strings.concatMapStringsSep "\n" template patterns;
+      in
+      # Pre-install hook
+      ''
+        runHook preInstall
+      ''
+      # Handle the existence of libPath, which requires us to re-arrange the lib directory
+      + strings.optionalString (libPath != null) ''
+        if [[ ! -d "${libPath}" ]] ; then
+          echo "${finalAttrs.pname}: ${libPath} does not exist, only found:" >&2
+          find "$(dirname ${libPath})"/ -maxdepth 1 >&2
+          echo "This release might not support your CUDA version" >&2
+          exit 1
+        fi
+        mv "lib/${libPath}" lib_new
+        rm -r lib
+        mv lib_new lib
+      ''
+      + ''
+        mkdir -p "$out"
+        mv * "$out"
+        ${strings.concatMapStringsSep "\n" mkMoveToOutputCommand (builtins.tail finalAttrs.outputs)}
+        runHook postInstall
+      '';
+
+    # libcuda needs to be resolved during runtime
+    # NOTE: Due to the use of __structuredAttrs, we can't use a list for autoPatchelfIgnoreMissingDeps, since it
+    # will take only the first value. Instead, we produce a string with the values separated by spaces.
+    # Using the `env` attribute ensures that the value is representable as one of the primitives allowed by
+    # bash's environment variables.
+    env.autoPatchelfIgnoreMissingDeps = "libcuda.so libcuda.so.*";
+
+    # The out output leverages the same functionality which backs the `symlinkJoin` function in
+    # Nixpkgs:
+    # https://github.com/NixOS/nixpkgs/blob/d8b2a92df48f9b08d68b0132ce7adfbdbc1fbfac/pkgs/build-support/trivial-builders/default.nix#L510
+    #
+    # That should allow us to emulate "fat" default outputs without having to actually create them.
+    #
+    # It is important that this run after the autoPatchelfHook, otherwise the symlinks in out will reference libraries in lib, creating a circular dependency.
+    postPhases = ["postPatchelf"];
+
+    # For each output, create a symlink to it in the out output.
+    # NOTE: We must recreate the out output here, because the setup hook will have deleted it
+    # if it was empty.
+    postPatchelf =
+      let
+        # Note the double dollar sign -- we want to interpolate the variable in bash, not the string.
+        mkJoinWithOutOutputCommand = output: ''${meta.getExe lndir} "${"$" + output}" "$out"'';
+      in
+      ''
+        mkdir -p "$out"
+        ${strings.concatMapStringsSep "\n" mkJoinWithOutOutputCommand (builtins.tail finalAttrs.outputs)}
+      '';
+
+    # Make the CUDA-patched stdenv available
+    passthru.stdenv = backendStdenv;
+
+    # Setting propagatedBuildInputs to false will prevent outputs known to the multiple-outputs
+    # from depending on `out` by default.
+    # https://github.com/NixOS/nixpkgs/blob/2920b6fc16a9ed5d51429e94238b28306ceda79e/pkgs/build-support/setup-hooks/multiple-outputs.sh#L196
+    # Indeed, we want to do the opposite -- fat "out" outputs that contain all the other outputs.
+    propagatedBuildOutputs = false;
+
+    # By default, if the dev output exists it just uses that.
+    # However, because we disabled propagatedBuildOutputs, dev doesn't contain libraries or
+    # anything of the sort. To remedy this, we set outputSpecified to true, and use
+    # outputsToInstall, which tells Nix which outputs to use when the package name is used
+    # unqualified (that is, without an explicit output).
+    outputSpecified = true;
+
+    meta = {
+      description = "${redistribRelease.name}. By downloading and using the packages you accept the terms and conditions of the ${finalAttrs.meta.license.shortName}";
+      sourceProvenance = [sourceTypes.binaryNativeCode];
+      platforms =
+        lists.concatMap
+          (
+            redistArch:
+            let
+              nixSystem = builtins.tryEval (flags.getNixSystem redistArch);
+            in
+            if nixSystem.success then [nixSystem.value] else []
+          )
+          supportedRedistArchs;
+      broken = lists.any trivial.id (attrsets.attrValues finalAttrs.brokenConditions);
+      license = licenses.unfree;
+      maintainers = teams.cuda.members;
+      # Force the use of the default, fat output by default (even though `dev` exists, which
+      # causes Nix to prefer that output over the others if outputSpecified isn't set).
+      outputsToInstall = ["out"];
+    };
+  }
+)
diff --git a/pkgs/development/cuda-modules/generic-builders/multiplex.nix b/pkgs/development/cuda-modules/generic-builders/multiplex.nix
new file mode 100644
index 0000000000000..b8053094bcc82
--- /dev/null
+++ b/pkgs/development/cuda-modules/generic-builders/multiplex.nix
@@ -0,0 +1,131 @@
+{
+  # callPackage-provided arguments
+  lib,
+  cudaVersion,
+  flags,
+  hostPlatform,
+  # Expected to be passed by the caller
+  mkVersionedPackageName,
+  # pname :: String
+  pname,
+  # releasesModule :: Path
+  # A path to a module which provides a `releases` attribute
+  releasesModule,
+  # shims :: Path
+  # A path to a module which provides a `shims` attribute
+  # The redistribRelease is only used in ./manifest.nix for the package version
+  # and the package description (which NVIDIA's manifest calls the "name").
+  # It's also used for fetching the source, but we override that since we can't
+  # re-use that portion of the functionality (different URLs, etc.).
+  # The featureRelease is used to populate meta.platforms (by way of looking at the attribute names)
+  # and to determine the outputs of the package.
+  # shimFn :: {package, redistArch} -> AttrSet
+  shimsFn ? ({package, redistArch}: throw "shimsFn must be provided"),
+  # fixupFn :: Path
+  # A path (or nix expression) to be evaluated with callPackage and then
+  # provided to the package's overrideAttrs function.
+  # It must accept at least the following arguments:
+  # - final
+  # - cudaVersion
+  # - mkVersionedPackageName
+  # - package
+  fixupFn ? (
+    {
+      final,
+      cudaVersion,
+      mkVersionedPackageName,
+      package,
+      ...
+    }:
+    throw "fixupFn must be provided"
+  ),
+}:
+let
+  inherit (lib)
+    attrsets
+    lists
+    modules
+    strings
+    ;
+
+  evaluatedModules = modules.evalModules {
+    modules = [
+      ../modules
+      releasesModule
+    ];
+  };
+
+  # NOTE: Important types:
+  # - Releases: ../modules/${pname}/releases/releases.nix
+  # - Package: ../modules/${pname}/releases/package.nix
+
+  # All releases across all platforms
+  # See ../modules/${pname}/releases/releases.nix
+  allReleases = evaluatedModules.config.${pname}.releases;
+
+  # Compute versioned attribute name to be used in this package set
+  # Patch version changes should not break the build, so we only use major and minor
+  # computeName :: Package -> String
+  computeName = {version, ...}: mkVersionedPackageName pname version;
+
+  # Check whether a package supports our CUDA version
+  # isSupported :: Package -> Bool
+  isSupported =
+    package:
+    strings.versionAtLeast cudaVersion package.minCudaVersion
+    && strings.versionAtLeast package.maxCudaVersion cudaVersion;
+
+  # Get all of the packages for our given platform.
+  redistArch = flags.getRedistArch hostPlatform.system;
+
+  # All the supported packages we can build for our platform.
+  # supportedPackages :: List (AttrSet Packages)
+  supportedPackages = builtins.filter isSupported (allReleases.${redistArch} or []);
+
+  # newestToOldestSupportedPackage :: List (AttrSet Packages)
+  newestToOldestSupportedPackage = lists.reverseList supportedPackages;
+
+  nameOfNewest = computeName (builtins.head newestToOldestSupportedPackage);
+
+  # A function which takes the `final` overlay and the `package` being built and returns
+  # a function to be consumed via `overrideAttrs`.
+  overrideAttrsFixupFn =
+    final: package:
+    final.callPackage fixupFn {
+      inherit
+        final
+        cudaVersion
+        mkVersionedPackageName
+        package
+        ;
+    };
+
+  extension =
+    final: _:
+    let
+      # Builds our package into derivation and wraps it in a nameValuePair, where the name is the versioned name
+      # of the package.
+      buildPackage =
+        package:
+        let
+          shims = final.callPackage shimsFn {inherit package redistArch;};
+          name = computeName package;
+          drv = final.callPackage ./manifest.nix {
+            inherit pname;
+            redistName = pname;
+            inherit (shims) redistribRelease featureRelease;
+          };
+          fixedDrv = drv.overrideAttrs (overrideAttrsFixupFn final package);
+        in
+        attrsets.nameValuePair name fixedDrv;
+
+      # versionedDerivations :: AttrSet Derivation
+      versionedDerivations = builtins.listToAttrs (lists.map buildPackage newestToOldestSupportedPackage);
+
+      defaultDerivation = attrsets.optionalAttrs (versionedDerivations != {}) {
+        ${pname} = versionedDerivations.${nameOfNewest};
+      };
+    in
+    versionedDerivations // defaultDerivation;
+in
+extension
diff --git a/pkgs/development/cuda-modules/gpus.nix b/pkgs/development/cuda-modules/gpus.nix
index d170739361383..cf6e0a1eaf360 100644
--- a/pkgs/development/cuda-modules/gpus.nix
+++ b/pkgs/development/cuda-modules/gpus.nix
@@ -86,7 +86,7 @@
     computeCapability = "5.3";
     isJetson = true;
     minCudaVersion = "10.0";
-    dontDefaultAfter = "11.0";
+    dontDefaultAfter = null;
     maxCudaVersion = null;
   }
   {
diff --git a/pkgs/development/cuda-modules/modules/cuda/default.nix b/pkgs/development/cuda-modules/modules/cuda/default.nix
new file mode 100644
index 0000000000000..4ea35d0482265
--- /dev/null
+++ b/pkgs/development/cuda-modules/modules/cuda/default.nix
@@ -0,0 +1 @@
+{options, ...}: {options.cuda.manifests = options.generic.manifests;}
diff --git a/pkgs/development/cuda-modules/modules/cudnn/default.nix b/pkgs/development/cuda-modules/modules/cudnn/default.nix
new file mode 100644
index 0000000000000..dd52cbaa24b4d
--- /dev/null
+++ b/pkgs/development/cuda-modules/modules/cudnn/default.nix
@@ -0,0 +1,12 @@
+{options, ...}:
+{
+  options.cudnn.releases = options.generic.releases;
+  # TODO(@connorbaker): Figure out how to add additional options to the
+  # to the generic release.
+  # {
+  #   url = options.mkOption {
+  #     description = "The URL to download the tarball from";
+  #     type = types.str;
+  #   };
+  # }
+}
diff --git a/pkgs/development/cuda-modules/modules/cutensor/default.nix b/pkgs/development/cuda-modules/modules/cutensor/default.nix
new file mode 100644
index 0000000000000..8ec2189fee4cc
--- /dev/null
+++ b/pkgs/development/cuda-modules/modules/cutensor/default.nix
@@ -0,0 +1 @@
+{options, ...}: {options.cutensor.manifests = options.generic.manifests;}
diff --git a/pkgs/development/cuda-modules/modules/default.nix b/pkgs/development/cuda-modules/modules/default.nix
new file mode 100644
index 0000000000000..ccccd871479e1
--- /dev/null
+++ b/pkgs/development/cuda-modules/modules/default.nix
@@ -0,0 +1,10 @@
+{
+  imports = [
+    ./generic
+    # Always after generic
+    ./cuda
+    ./cudnn
+    ./cutensor
+    ./tensorrt
+  ];
+}
diff --git a/pkgs/development/cuda-modules/modules/generic/default.nix b/pkgs/development/cuda-modules/modules/generic/default.nix
new file mode 100644
index 0000000000000..b68aa614f2400
--- /dev/null
+++ b/pkgs/development/cuda-modules/modules/generic/default.nix
@@ -0,0 +1,7 @@
+{
+  imports = [
+    ./types
+    ./manifests
+    ./releases
+  ];
+}
diff --git a/pkgs/development/cuda-modules/modules/generic/manifests/default.nix b/pkgs/development/cuda-modules/modules/generic/manifests/default.nix
new file mode 100644
index 0000000000000..6c12919ff4000
--- /dev/null
+++ b/pkgs/development/cuda-modules/modules/generic/manifests/default.nix
@@ -0,0 +1,7 @@
+{lib, config, ...}:
+{
+  options.generic.manifests = {
+    feature = import ./feature/manifest.nix {inherit lib config;};
+    redistrib = import ./redistrib/manifest.nix {inherit lib;};
+  };
+}
diff --git a/pkgs/development/cuda-modules/modules/generic/manifests/feature/manifest.nix b/pkgs/development/cuda-modules/modules/generic/manifests/feature/manifest.nix
new file mode 100644
index 0000000000000..29ca678e0e5a5
--- /dev/null
+++ b/pkgs/development/cuda-modules/modules/generic/manifests/feature/manifest.nix
@@ -0,0 +1,10 @@
+{lib, config, ...}:
+let
+  inherit (lib) options trivial types;
+  Release = import ./release.nix {inherit lib config;};
+in
+options.mkOption {
+  description = "A feature manifest is an attribute set which includes a mapping from package name to release";
+  example = trivial.importJSON ../../../../cuda/manifests/feature_11.5.2.json;
+  type = types.attrsOf Release.type;
+}
diff --git a/pkgs/development/cuda-modules/modules/generic/manifests/feature/outputs.nix b/pkgs/development/cuda-modules/modules/generic/manifests/feature/outputs.nix
new file mode 100644
index 0000000000000..db6dff769e145
--- /dev/null
+++ b/pkgs/development/cuda-modules/modules/generic/manifests/feature/outputs.nix
@@ -0,0 +1,60 @@
+{lib, ...}:
+let
+  inherit (lib) options types;
+in
+# https://github.com/ConnorBaker/cuda-redist-find-features/blob/603407bea2fab47f2dfcd88431122a505af95b42/cuda_redist_find_features/manifest/feature/package/package.py
+options.mkOption {
+  description = "A set of outputs that a package can provide.";
+  example = {
+    bin = true;
+    dev = true;
+    doc = false;
+    lib = false;
+    sample = false;
+    static = false;
+  };
+  type = types.submodule {
+    options = {
+      bin = options.mkOption {
+        description = "A `bin` output requires that we have a non-empty `bin` directory containing at least one file with the executable bit set.";
+        type = types.bool;
+      };
+      dev = options.mkOption {
+        description = ''
+          A `dev` output requires that we have at least one of the following non-empty directories:
+
+          - `include`
+          - `lib/pkgconfig`
+          - `share/pkgconfig`
+          - `lib/cmake`
+          - `share/aclocal`
+        '';
+        type = types.bool;
+      };
+      doc = options.mkOption {
+        description = ''
+          A `doc` output requires that we have at least one of the following non-empty directories:
+
+          - `share/info`
+          - `share/doc`
+          - `share/gtk-doc`
+          - `share/devhelp`
+          - `share/man`
+        '';
+        type = types.bool;
+      };
+      lib = options.mkOption {
+        description = "A `lib` output requires that we have a non-empty lib directory containing at least one shared library.";
+        type = types.bool;
+      };
+      sample = options.mkOption {
+        description = "A `sample` output requires that we have a non-empty `samples` directory.";
+        type = types.bool;
+      };
+      static = options.mkOption {
+        description = "A `static` output requires that we have a non-empty lib directory containing at least one static library.";
+        type = types.bool;
+      };
+    };
+  };
+}
diff --git a/pkgs/development/cuda-modules/modules/generic/manifests/feature/package.nix b/pkgs/development/cuda-modules/modules/generic/manifests/feature/package.nix
new file mode 100644
index 0000000000000..2c36a3e0cb270
--- /dev/null
+++ b/pkgs/development/cuda-modules/modules/generic/manifests/feature/package.nix
@@ -0,0 +1,10 @@
+{lib, ...}:
+let
+  inherit (lib) options types;
+  Outputs = import ./outputs.nix {inherit lib;};
+in
+options.mkOption {
+  description = "A package in the manifest";
+  example = (import ./release.nix {inherit lib;}).linux-x86_64;
+  type = types.submodule {options.outputs = Outputs;};
+}
diff --git a/pkgs/development/cuda-modules/modules/generic/manifests/feature/release.nix b/pkgs/development/cuda-modules/modules/generic/manifests/feature/release.nix
new file mode 100644
index 0000000000000..be3a30ffdc59d
--- /dev/null
+++ b/pkgs/development/cuda-modules/modules/generic/manifests/feature/release.nix
@@ -0,0 +1,10 @@
+{lib, config, ...}:
+let
+  inherit (lib) options types;
+  Package = import ./package.nix {inherit lib config;};
+in
+options.mkOption {
+  description = "A release is an attribute set which includes a mapping from platform to package";
+  example = (import ./manifest.nix {inherit lib;}).cuda_cccl;
+  type = types.attrsOf Package.type;
+}
diff --git a/pkgs/development/cuda-modules/modules/generic/manifests/redistrib/manifest.nix b/pkgs/development/cuda-modules/modules/generic/manifests/redistrib/manifest.nix
new file mode 100644
index 0000000000000..0cfa40241fdc0
--- /dev/null
+++ b/pkgs/development/cuda-modules/modules/generic/manifests/redistrib/manifest.nix
@@ -0,0 +1,33 @@
+{lib, ...}:
+let
+  inherit (lib) options trivial types;
+  Release = import ./release.nix {inherit lib;};
+in
+options.mkOption {
+  description = "A redistributable manifest is an attribute set which includes a mapping from package name to release";
+  example = trivial.importJSON ../../../../cuda/manifests/redistrib_11.5.2.json;
+  type = types.submodule {
+    # Allow any attribute name as these will be the package names
+    freeformType = types.attrsOf Release.type;
+    options = {
+      release_date = options.mkOption {
+        description = "The release date of the manifest";
+        type = types.nullOr types.str;
+        default = null;
+        example = "2023-08-29";
+      };
+      release_label = options.mkOption {
+        description = "The release label of the manifest";
+        type = types.nullOr types.str;
+        default = null;
+        example = "12.2.2";
+      };
+      release_product = options.mkOption {
+        example = "cuda";
+        description = "The release product of the manifest";
+        type = types.nullOr types.str;
+        default = null;
+      };
+    };
+  };
+}
diff --git a/pkgs/development/cuda-modules/modules/generic/manifests/redistrib/package.nix b/pkgs/development/cuda-modules/modules/generic/manifests/redistrib/package.nix
new file mode 100644
index 0000000000000..8d18c06b893f4
--- /dev/null
+++ b/pkgs/development/cuda-modules/modules/generic/manifests/redistrib/package.nix
@@ -0,0 +1,32 @@
+{lib, ...}:
+let
+  inherit (lib) options types;
+in
+options.mkOption {
+  description = "A package in the manifest";
+  example = (import ./release.nix {inherit lib;}).linux-x86_64;
+  type = types.submodule {
+    options = {
+      relative_path = options.mkOption {
+        description = "The relative path to the package";
+        example = "cuda_cccl/linux-x86_64/cuda_cccl-linux-x86_64-11.5.62-archive.tar.xz";
+        type = types.str;
+      };
+      sha256 = options.mkOption {
+        description = "The sha256 hash of the package";
+        example = "bbe633d6603d5a96a214dcb9f3f6f6fd2fa04d62e53694af97ae0c7afe0121b0";
+        type = types.str;
+      };
+      md5 = options.mkOption {
+        description = "The md5 hash of the package";
+        example = "e5deef4f6cb71f14aac5be5d5745dafe";
+        type = types.str;
+      };
+      size = options.mkOption {
+        description = "The size of the package as a string";
+        type = types.str;
+        example = "960968";
+      };
+    };
+  };
+}
diff --git a/pkgs/development/cuda-modules/modules/generic/manifests/redistrib/release.nix b/pkgs/development/cuda-modules/modules/generic/manifests/redistrib/release.nix
new file mode 100644
index 0000000000000..dd2b206fede41
--- /dev/null
+++ b/pkgs/development/cuda-modules/modules/generic/manifests/redistrib/release.nix
@@ -0,0 +1,36 @@
+{lib, ...}:
+let
+  inherit (lib) options types;
+  Package = import ./package.nix {inherit lib;};
+in
+options.mkOption {
+  description = "A release is an attribute set which includes a mapping from platform to package";
+  example = (import ./manifest.nix {inherit lib;}).cuda_cccl;
+  type = types.submodule {
+    # Allow any attribute name as these will be the platform names
+    freeformType = types.attrsOf Package.type;
+    options = {
+      name = options.mkOption {
+        description = "The full name of the package";
+        example = "CXX Core Compute Libraries";
+        type = types.str;
+      };
+      license = options.mkOption {
+        description = "The license of the package";
+        example = "CUDA Toolkit";
+        type = types.str;
+      };
+      license_path = options.mkOption {
+        description = "The path to the license of the package";
+        example = "cuda_cccl/LICENSE.txt";
+        default = null;
+        type = types.nullOr types.str;
+      };
+      version = options.mkOption {
+        description = "The version of the package";
+        example = "11.5.62";
+        type = types.str;
+      };
+    };
+  };
+}
diff --git a/pkgs/development/cuda-modules/modules/generic/releases/default.nix b/pkgs/development/cuda-modules/modules/generic/releases/default.nix
new file mode 100644
index 0000000000000..8da6f0d5cc79c
--- /dev/null
+++ b/pkgs/development/cuda-modules/modules/generic/releases/default.nix
@@ -0,0 +1,45 @@
+{lib, config, ...}:
+let
+  inherit (config.generic.types) majorMinorVersion majorMinorPatchBuildVersion;
+  inherit (lib) options types;
+in
+{
+  options.generic.releases = options.mkOption {
+    description = "A collection of packages targeting different platforms";
+    type =
+      let
+        Package = options.mkOption {
+          description = "A package for a specific platform";
+          example = {
+            version = "8.0.3.4";
+            minCudaVersion = "10.2";
+            maxCudaVersion = "10.2";
+            hash = "sha256-LxcXgwe1OCRfwDsEsNLIkeNsOcx3KuF5Sj+g2dY6WD0=";
+          };
+          type = types.submodule {
+            # TODO(@connorbaker): Figure out how to extend option sets.
+            freeformType = types.attrsOf types.anything;
+            options = {
+              version = options.mkOption {
+                description = "The version of the package";
+                type = majorMinorPatchBuildVersion;
+              };
+              minCudaVersion = options.mkOption {
+                description = "The minimum CUDA version supported";
+                type = majorMinorVersion;
+              };
+              maxCudaVersion = options.mkOption {
+                description = "The maximum CUDA version supported";
+                type = majorMinorVersion;
+              };
+              hash = options.mkOption {
+                description = "The hash of the tarball";
+                type = types.str;
+              };
+            };
+          };
+        };
+      in
+      types.attrsOf (types.listOf Package.type);
+  };
+}
diff --git a/pkgs/development/cuda-modules/modules/generic/types/default.nix b/pkgs/development/cuda-modules/modules/generic/types/default.nix
new file mode 100644
index 0000000000000..61d13b3cc8d2b
--- /dev/null
+++ b/pkgs/development/cuda-modules/modules/generic/types/default.nix
@@ -0,0 +1,39 @@
+{lib, ...}:
+let
+  inherit (lib) options types;
+in
+{
+  options.generic.types = options.mkOption {
+    type = types.attrsOf types.optionType;
+    default = {};
+    description = "A set of generic types.";
+  };
+  config.generic.types = {
+    cudaArch = types.strMatching "^sm_[[:digit:]]+[a-z]?$" // {
+      name = "cudaArch";
+      description = "A CUDA architecture name.";
+    };
+    # https://github.com/ConnorBaker/cuda-redist-find-features/blob/c841980e146f8664bbcd0ba1399e486b7910617b/cuda_redist_find_features/types/_lib_so_name.py
+    libSoName = types.strMatching ".*\\.so(\\.[[:digit:]]+)*$" // {
+      name = "libSoName";
+      description = "The name of a shared object file.";
+    };
+
+    majorMinorVersion = types.strMatching "^([[:digit:]]+)\\.([[:digit:]]+)$" // {
+      name = "majorMinorVersion";
+      description = "A version number with a major and minor component.";
+    };
+
+    majorMinorPatchVersion = types.strMatching "^([[:digit:]]+)\\.([[:digit:]]+)\\.([[:digit:]]+)$" // {
+      name = "majorMinorPatchVersion";
+      description = "A version number with a major, minor, and patch component.";
+    };
+
+    majorMinorPatchBuildVersion =
+      types.strMatching "^([[:digit:]]+)\\.([[:digit:]]+)\\.([[:digit:]]+)\\.([[:digit:]]+)$"
+      // {
+        name = "majorMinorPatchBuildVersion";
+        description = "A version number with a major, minor, patch, and build component.";
+      };
+  };
+}
diff --git a/pkgs/development/cuda-modules/modules/tensorrt/default.nix b/pkgs/development/cuda-modules/modules/tensorrt/default.nix
new file mode 100644
index 0000000000000..e62942c679aa0
--- /dev/null
+++ b/pkgs/development/cuda-modules/modules/tensorrt/default.nix
@@ -0,0 +1,16 @@
+{options, ...}:
+{
+  options.tensorrt.releases = options.generic.releases;
+  # TODO(@connorbaker): Figure out how to add additional options to the
+  # to the generic release.
+  # {
+  #   cudnnVersion = lib.options.mkOption {
+  #     description = "The CUDNN version supported";
+  #     type = types.nullOr majorMinorVersion;
+  #   };
+  #   filename = lib.options.mkOption {
+  #     description = "The tarball name";
+  #     type = types.str;
+  #   };
+  # }
+}
diff --git a/pkgs/development/cuda-modules/nccl-tests/default.nix b/pkgs/development/cuda-modules/nccl-tests/default.nix
new file mode 100644
index 0000000000000..5c2f29b7ed564
--- /dev/null
+++ b/pkgs/development/cuda-modules/nccl-tests/default.nix
@@ -0,0 +1,84 @@
+# NOTE: Though NCCL tests is called within the cudaPackages package set, we avoid passing in
+# the names of dependencies from that package set directly to avoid evaluation errors
+# in the case redistributable packages are not available.
+{
+  config,
+  cudaPackages,
+  fetchFromGitHub,
+  gitUpdater,
+  lib,
+  mpi,
+  mpiSupport ? false,
+  which,
+}:
+let
+  inherit (cudaPackages)
+    backendStdenv
+    cuda_cccl
+    cuda_cudart
+    cuda_nvcc
+    cudatoolkit
+    cudaVersion
+    nccl
+    ;
+in
+backendStdenv.mkDerivation (
+  finalAttrs: {
+
+    pname = "nccl-tests";
+    version = "2.13.8";
+
+    src = fetchFromGitHub {
+      owner = "NVIDIA";
+      repo = finalAttrs.pname;
+      rev = "v${finalAttrs.version}";
+      hash = "sha256-dxLoflsTHDBnZRTzoXdm30OyKpLlRa73b784YWALBHg=";
+    };
+
+    strictDeps = true;
+
+    nativeBuildInputs =
+      [which]
+      ++ lib.optionals (lib.versionOlder cudaVersion "11.4") [cudatoolkit]
+      ++ lib.optionals (lib.versionAtLeast cudaVersion "11.4") [cuda_nvcc];
+
+    buildInputs =
+      [nccl]
+      ++ lib.optionals (lib.versionOlder cudaVersion "11.4") [cudatoolkit]
+      ++ lib.optionals (lib.versionAtLeast cudaVersion "11.4") [
+        cuda_nvcc.dev # crt/host_config.h
+        cuda_cudart
+      ]
+      ++ lib.optionals (lib.versionAtLeast cudaVersion "12.0") [
+        cuda_cccl.dev # <nv/target>
+      ]
+      ++ lib.optionals mpiSupport [mpi];
+
+    makeFlags =
+      ["NCCL_HOME=${nccl}"]
+      ++ lib.optionals (lib.versionOlder cudaVersion "11.4") ["CUDA_HOME=${cudatoolkit}"]
+      ++ lib.optionals (lib.versionAtLeast cudaVersion "11.4") ["CUDA_HOME=${cuda_nvcc}"]
+      ++ lib.optionals mpiSupport ["MPI=1"];
+
+    enableParallelBuilding = true;
+
+    installPhase = ''
+      mkdir -p $out/bin
+      cp -r build/* $out/bin/
+    '';
+
+    passthru.updateScript = gitUpdater {
+      inherit (finalAttrs) pname version;
+      rev-prefix = "v";
+    };
+
+    meta = with lib; {
+      description = "Tests to check both the performance and the correctness of NVIDIA NCCL operations";
+      homepage = "https://github.com/NVIDIA/nccl-tests";
+      platforms = platforms.linux;
+      license = licenses.bsd3;
+      broken = !config.cudaSupport || (mpiSupport && mpi == null);
+      maintainers = with maintainers; [jmillerpdt] ++ teams.cuda.members;
+    };
+  }
+)
diff --git a/pkgs/development/cuda-modules/nccl/default.nix b/pkgs/development/cuda-modules/nccl/default.nix
new file mode 100644
index 0000000000000..c56d59cb42068
--- /dev/null
+++ b/pkgs/development/cuda-modules/nccl/default.nix
@@ -0,0 +1,112 @@
+# NOTE: Though NCCL is called within the cudaPackages package set, we avoid passing in
+# the names of dependencies from that package set directly to avoid evaluation errors
+# in the case redistributable packages are not available.
+{
+  lib,
+  fetchFromGitHub,
+  python3,
+  which,
+  cudaPackages,
+  # passthru.updateScript
+  gitUpdater,
+}:
+let
+  inherit (cudaPackages)
+    autoAddOpenGLRunpathHook
+    backendStdenv
+    cuda_cccl
+    cuda_cudart
+    cuda_nvcc
+    cudaFlags
+    cudatoolkit
+    cudaVersion
+    ;
+in
+backendStdenv.mkDerivation (
+  finalAttrs: {
+    pname = "nccl";
+    version = "2.19.3-1";
+
+    src = fetchFromGitHub {
+      owner = "NVIDIA";
+      repo = finalAttrs.pname;
+      rev = "v${finalAttrs.version}";
+      hash = "sha256-59FlOKM5EB5Vkm4dZBRCkn+IgIcdQehE+FyZAdTCT/A=";
+    };
+
+    strictDeps = true;
+
+    outputs = [
+      "out"
+      "dev"
+    ];
+
+    nativeBuildInputs =
+      [
+        which
+        autoAddOpenGLRunpathHook
+        python3
+      ]
+      ++ lib.optionals (lib.versionOlder cudaVersion "11.4") [cudatoolkit]
+      ++ lib.optionals (lib.versionAtLeast cudaVersion "11.4") [cuda_nvcc];
+
+    buildInputs =
+      lib.optionals (lib.versionOlder cudaVersion "11.4") [cudatoolkit]
+      ++ lib.optionals (lib.versionAtLeast cudaVersion "11.4") [
+        cuda_nvcc.dev # crt/host_config.h
+        cuda_cudart
+      ]
+      # NOTE: CUDA versions in Nixpkgs only use a major and minor version. When we do comparisons
+      # against other version, like below, it's important that we use the same format. Otherwise,
+      # we'll get incorrect results.
+      # For example, lib.versionAtLeast "12.0" "12.0.0" == false.
+      ++ lib.optionals (lib.versionAtLeast cudaVersion "12.0") [cuda_cccl];
+
+    env.NIX_CFLAGS_COMPILE = toString ["-Wno-unused-function"];
+
+    preConfigure = ''
+      patchShebangs ./src/device/generate.py
+      makeFlagsArray+=(
+        "NVCC_GENCODE=${lib.concatStringsSep " " cudaFlags.gencode}"
+      )
+    '';
+
+    makeFlags =
+      ["PREFIX=$(out)"]
+      ++ lib.optionals (lib.versionOlder cudaVersion "11.4") [
+        "CUDA_HOME=${cudatoolkit}"
+        "CUDA_LIB=${lib.getLib cudatoolkit}/lib"
+        "CUDA_INC=${lib.getDev cudatoolkit}/include"
+      ]
+      ++ lib.optionals (lib.versionAtLeast cudaVersion "11.4") [
+        "CUDA_HOME=${cuda_nvcc}"
+        "CUDA_LIB=${lib.getLib cuda_cudart}/lib"
+        "CUDA_INC=${lib.getDev cuda_cudart}/include"
+      ];
+
+    enableParallelBuilding = true;
+
+    postFixup = ''
+      moveToOutput lib/libnccl_static.a $dev
+    '';
+
+    passthru.updateScript = gitUpdater {
+      inherit (finalAttrs) pname version;
+      rev-prefix = "v";
+    };
+
+    meta = with lib; {
+      description = "Multi-GPU and multi-node collective communication primitives for NVIDIA GPUs";
+      homepage = "https://developer.nvidia.com/nccl";
+      license = licenses.bsd3;
+      platforms = platforms.linux;
+      maintainers =
+        with maintainers;
+        [
+          mdaiter
+          orivej
+        ]
+        ++ teams.cuda.members;
+    };
+  }
+)
diff --git a/pkgs/development/cuda-modules/nvcc-compatibilities.nix b/pkgs/development/cuda-modules/nvcc-compatibilities.nix
index 50f05f8af8ebf..3981869e4b315 100644
--- a/pkgs/development/cuda-modules/nvcc-compatibilities.nix
+++ b/pkgs/development/cuda-modules/nvcc-compatibilities.nix
@@ -70,16 +70,22 @@ let
 
     # Added support for Clang 12 and GCC 11
     # https://docs.nvidia.com/cuda/archive/11.4.4/cuda-toolkit-release-notes/index.html#cuda-general-new-features
-    "11.4" = {
+    "11.4" = attrs."11.3" // {
       clangMaxMajorVersion = "12";
-      gccMaxMajorVersion = "11";
+      # NOTE: There is a bug in the version of GLIBC that GCC 11 uses which causes it to fail to compile some CUDA
+      # code. As such, we skip it for this release, and do the bump in 11.6 (skipping 11.5).
+      # https://forums.developer.nvidia.com/t/cuda-11-5-samples-throw-multiple-error-attribute-malloc-does-not-take-arguments/192750/15
+      # gccMaxMajorVersion = "11";
     };
 
     # No changes from 11.4 to 11.5
     "11.5" = attrs."11.4";
 
     # No changes from 11.5 to 11.6
-    "11.6" = attrs."11.5";
+    # However, as mentioned above, we add GCC 11 this release.
+    "11.6" = attrs."11.5" // {
+      gccMaxMajorVersion = "11";
+    };
 
     # Added support for Clang 13
     # https://docs.nvidia.com/cuda/archive/11.7.1/cuda-toolkit-release-notes/index.html#cuda-compiler-new-features
diff --git a/pkgs/development/cuda-modules/saxpy/CMakeLists.txt b/pkgs/development/cuda-modules/saxpy/CMakeLists.txt
new file mode 100644
index 0000000000000..a6954e6e8bee2
--- /dev/null
+++ b/pkgs/development/cuda-modules/saxpy/CMakeLists.txt
@@ -0,0 +1,12 @@
+cmake_minimum_required(VERSION 3.25)
+project(saxpy LANGUAGES CXX CUDA)
+
+find_package(CUDAToolkit REQUIRED COMPONENTS cudart cublas)
+
+add_executable(saxpy saxpy.cu)
+target_link_libraries(saxpy PUBLIC CUDA::cublas CUDA::cudart m)
+target_compile_features(saxpy PRIVATE cxx_std_14)
+target_compile_options(saxpy PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
+                                     --expt-relaxed-constexpr>)
+
+install(TARGETS saxpy)
diff --git a/pkgs/development/cuda-modules/saxpy/default.nix b/pkgs/development/cuda-modules/saxpy/default.nix
new file mode 100644
index 0000000000000..fff52801e3cca
--- /dev/null
+++ b/pkgs/development/cuda-modules/saxpy/default.nix
@@ -0,0 +1,56 @@
+{
+  cmake,
+  cudaPackages,
+  lib,
+}:
+let
+  inherit (cudaPackages)
+    autoAddOpenGLRunpathHook
+    backendStdenv
+    cuda_cccl
+    cuda_cudart
+    cuda_nvcc
+    cudatoolkit
+    cudaVersion
+    flags
+    libcublas
+    ;
+in
+backendStdenv.mkDerivation {
+  pname = "saxpy";
+  version = "unstable-2023-07-11";
+
+  src = ./.;
+
+  strictDeps = true;
+
+  nativeBuildInputs =
+    [
+      cmake
+      autoAddOpenGLRunpathHook
+    ]
+    ++ lib.optionals (lib.versionOlder cudaVersion "11.4") [cudatoolkit]
+    ++ lib.optionals (lib.versionAtLeast cudaVersion "11.4") [cuda_nvcc];
+
+  buildInputs =
+    lib.optionals (lib.versionOlder cudaVersion "11.4") [cudatoolkit]
+    ++ lib.optionals (lib.versionAtLeast cudaVersion "11.4") [
+      libcublas
+      cuda_cudart
+    ]
+    ++ lib.optionals (lib.versionAtLeast cudaVersion "12.0") [cuda_cccl];
+
+  cmakeFlags = [
+    (lib.cmakeBool "CMAKE_VERBOSE_MAKEFILE" true)
+    (lib.cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
+      with flags; lib.concatStringsSep ";" (lib.lists.map dropDot cudaCapabilities)
+    ))
+  ];
+
+  meta = {
+    description = "A simple (Single-precision AX Plus Y) FindCUDAToolkit.cmake example for testing cross-compilation";
+    license = lib.licenses.mit;
+    maintainers = lib.teams.cuda.members;
+    platforms = lib.platforms.unix;
+  };
+}
diff --git a/pkgs/development/cuda-modules/saxpy/saxpy.cu b/pkgs/development/cuda-modules/saxpy/saxpy.cu
new file mode 100644
index 0000000000000..912a6d1647b14
--- /dev/null
+++ b/pkgs/development/cuda-modules/saxpy/saxpy.cu
@@ -0,0 +1,68 @@
+#include <cublas_v2.h>
+#include <cuda_runtime.h>
+#include <vector>
+
+#include <stdio.h>
+
+static inline void check(cudaError_t err, const char *context) {
+  if (err != cudaSuccess) {
+    fprintf(stderr, "CUDA error at %s: %s\n", context, cudaGetErrorString(err));
+    std::exit(EXIT_FAILURE);
+  }
+}
+
+#define CHECK(x) check(x, #x)
+
+__global__ void saxpy(int n, float a, float *x, float *y) {
+  int i = blockIdx.x * blockDim.x + threadIdx.x;
+  if (i < n)
+    y[i] = a * x[i] + y[i];
+}
+
+int main(void) {
+  setbuf(stderr, NULL);
+  fprintf(stderr, "Start\n");
+
+  int rtVersion, driverVersion;
+  CHECK(cudaRuntimeGetVersion(&rtVersion));
+  CHECK(cudaDriverGetVersion(&driverVersion));
+
+  fprintf(stderr, "Runtime version: %d\n", rtVersion);
+  fprintf(stderr, "Driver version: %d\n", driverVersion);
+
+  constexpr int N = 1 << 10;
+
+  std::vector<float> xHost(N), yHost(N);
+  for (int i = 0; i < N; i++) {
+    xHost[i] = 1.0f;
+    yHost[i] = 2.0f;
+  }
+
+  fprintf(stderr, "Host memory initialized, copying to the device\n");
+  fflush(stderr);
+
+  float *xDevice, *yDevice;
+  CHECK(cudaMalloc(&xDevice, N * sizeof(float)));
+  CHECK(cudaMalloc(&yDevice, N * sizeof(float)));
+
+  CHECK(cudaMemcpy(xDevice, xHost.data(), N * sizeof(float),
+                   cudaMemcpyHostToDevice));
+  CHECK(cudaMemcpy(yDevice, yHost.data(), N * sizeof(float),
+                   cudaMemcpyHostToDevice));
+  fprintf(stderr, "Scheduled a cudaMemcpy, calling the kernel\n");
+
+  saxpy<<<(N + 255) / 256, 256>>>(N, 2.0f, xDevice, yDevice);
+  fprintf(stderr, "Scheduled a kernel call\n");
+  CHECK(cudaGetLastError());
+
+  CHECK(cudaMemcpy(yHost.data(), yDevice, N * sizeof(float),
+                   cudaMemcpyDeviceToHost));
+
+  float maxError = 0.0f;
+  for (int i = 0; i < N; i++)
+    maxError = max(maxError, abs(yHost[i] - 4.0f));
+  fprintf(stderr, "Max error: %f\n", maxError);
+
+  CHECK(cudaFree(xDevice));
+  CHECK(cudaFree(yDevice));
+}
diff --git a/pkgs/development/cuda-modules/setup-hooks/auto-add-opengl-runpath-hook.sh b/pkgs/development/cuda-modules/setup-hooks/auto-add-opengl-runpath-hook.sh
new file mode 100644
index 0000000000000..f50a5f6c25c66
--- /dev/null
+++ b/pkgs/development/cuda-modules/setup-hooks/auto-add-opengl-runpath-hook.sh
@@ -0,0 +1,28 @@
+# shellcheck shell=bash
+# Run addOpenGLRunpath on all dynamically linked, ELF files
+echo "Sourcing auto-add-opengl-runpath-hook"
+
+elfHasDynamicSection() {
+    patchelf --print-rpath "$1" >& /dev/null
+}
+
+autoAddOpenGLRunpathPhase() (
+  local outputPaths
+  mapfile -t outputPaths < <(for o in $(getAllOutputNames); do echo "${!o}"; done)
+  find "${outputPaths[@]}" -type f -executable -print0  | while IFS= read -rd "" f; do
+    if isELF "$f"; then
+      # patchelf returns an error on statically linked ELF files
+      if elfHasDynamicSection "$f" ; then
+        echo "autoAddOpenGLRunpathHook: patching $f"
+        addOpenGLRunpath "$f"
+      elif (( "${NIX_DEBUG:-0}" >= 1 )) ; then
+        echo "autoAddOpenGLRunpathHook: skipping a statically-linked ELF file $f"
+      fi
+    fi
+  done
+)
+
+if [ -z "${dontUseAutoAddOpenGLRunpath-}" ]; then
+  echo "Using autoAddOpenGLRunpathPhase"
+  postFixupHooks+=(autoAddOpenGLRunpathPhase)
+fi
diff --git a/pkgs/development/cuda-modules/setup-hooks/extension.nix b/pkgs/development/cuda-modules/setup-hooks/extension.nix
new file mode 100644
index 0000000000000..762dad9ea8765
--- /dev/null
+++ b/pkgs/development/cuda-modules/setup-hooks/extension.nix
@@ -0,0 +1,47 @@
+final: _: {
+  # Internal hook, used by cudatoolkit and cuda redist packages
+  # to accommodate automatic CUDAToolkit_ROOT construction
+  markForCudatoolkitRootHook =
+    final.callPackage
+      (
+        {makeSetupHook}:
+        makeSetupHook {name = "mark-for-cudatoolkit-root-hook";} ./mark-for-cudatoolkit-root-hook.sh
+      )
+      {};
+
+  # Currently propagated by cuda_nvcc or cudatoolkit, rather than used directly
+  setupCudaHook =
+    (final.callPackage
+      (
+        {makeSetupHook, backendStdenv}:
+        makeSetupHook
+          {
+            name = "setup-cuda-hook";
+
+            substitutions.setupCudaHook = placeholder "out";
+
+            # Point NVCC at a compatible compiler
+            substitutions.ccRoot = "${backendStdenv.cc}";
+
+            # Required in addition to ccRoot as otherwise bin/gcc is looked up
+            # when building CMakeCUDACompilerId.cu
+            substitutions.ccFullPath = "${backendStdenv.cc}/bin/${backendStdenv.cc.targetPrefix}c++";
+          }
+          ./setup-cuda-hook.sh
+      )
+      {}
+    );
+
+  autoAddOpenGLRunpathHook =
+    final.callPackage
+      (
+        {addOpenGLRunpath, makeSetupHook}:
+        makeSetupHook
+          {
+            name = "auto-add-opengl-runpath-hook";
+            propagatedBuildInputs = [addOpenGLRunpath];
+          }
+          ./auto-add-opengl-runpath-hook.sh
+      )
+      {};
+}
diff --git a/pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook.sh b/pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook.sh
new file mode 100644
index 0000000000000..ba04c2e0806af
--- /dev/null
+++ b/pkgs/development/cuda-modules/setup-hooks/mark-for-cudatoolkit-root-hook.sh
@@ -0,0 +1,14 @@
+# shellcheck shell=bash
+
+# Should we mimick cc-wrapper's "hygiene"?
+[[ -z ${strictDeps-} ]] || (( "$hostOffset" < 0 )) || return 0
+
+echo "Sourcing mark-for-cudatoolkit-root-hook" >&2
+
+markForCUDAToolkit_ROOT() {
+    mkdir -p "${prefix}/nix-support"
+    [[ -f "${prefix}/nix-support/include-in-cudatoolkit-root" ]] && return
+    echo "$pname-$output" > "${prefix}/nix-support/include-in-cudatoolkit-root"
+}
+
+fixupOutputHooks+=(markForCUDAToolkit_ROOT)
diff --git a/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook.sh b/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook.sh
new file mode 100644
index 0000000000000..7b7b3bdde80e3
--- /dev/null
+++ b/pkgs/development/cuda-modules/setup-hooks/setup-cuda-hook.sh
@@ -0,0 +1,139 @@
+# shellcheck shell=bash
+
+# Only run the hook from nativeBuildInputs
+(( "$hostOffset" == -1 && "$targetOffset" == 0)) || return 0
+
+guard=Sourcing
+reason=
+
+[[ -n ${cudaSetupHookOnce-} ]] && guard=Skipping && reason=" because the hook has been propagated more than once"
+
+if (( "${NIX_DEBUG:-0}" >= 1 )) ; then
+    echo "$guard hostOffset=$hostOffset targetOffset=$targetOffset setupCudaHook$reason" >&2
+else
+    echo "$guard setup-cuda-hook$reason" >&2
+fi
+
+[[ "$guard" = Sourcing ]] || return 0
+
+declare -g cudaSetupHookOnce=1
+declare -Ag cudaHostPathsSeen=()
+declare -Ag cudaOutputToPath=()
+
+extendcudaHostPathsSeen() {
+    (( "${NIX_DEBUG:-0}" >= 1 )) && echo "extendcudaHostPathsSeen $1" >&2
+
+    local markerPath="$1/nix-support/include-in-cudatoolkit-root"
+    [[ ! -f "${markerPath}" ]] && return
+    [[ -v cudaHostPathsSeen[$1] ]] && return
+
+    cudaHostPathsSeen["$1"]=1
+
+    # E.g. cuda_cudart-lib
+    local cudaOutputName
+    read -r cudaOutputName < "$markerPath"
+
+    [[ -z "$cudaOutputName" ]] && return
+
+    local oldPath="${cudaOutputToPath[$cudaOutputName]-}"
+    [[ -n "$oldPath" ]] && echo "extendcudaHostPathsSeen: warning: overwriting $cudaOutputName from $oldPath to $1" >&2
+    cudaOutputToPath["$cudaOutputName"]="$1"
+}
+addEnvHooks "$targetOffset" extendcudaHostPathsSeen
+
+setupCUDAToolkit_ROOT() {
+    (( "${NIX_DEBUG:-0}" >= 1 )) && echo "setupCUDAToolkit_ROOT: cudaHostPathsSeen=${!cudaHostPathsSeen[*]}" >&2
+
+    for path in "${!cudaHostPathsSeen[@]}" ; do
+        addToSearchPathWithCustomDelimiter ";" CUDAToolkit_ROOT "$path"
+        if [[ -d "$path/include" ]] ; then
+            addToSearchPathWithCustomDelimiter ";" CUDAToolkit_INCLUDE_DIR "$path/include"
+        fi
+    done
+
+    export cmakeFlags+=" -DCUDAToolkit_INCLUDE_DIR=$CUDAToolkit_INCLUDE_DIR -DCUDAToolkit_ROOT=$CUDAToolkit_ROOT"
+}
+preConfigureHooks+=(setupCUDAToolkit_ROOT)
+
+setupCUDAToolkitCompilers() {
+    echo Executing setupCUDAToolkitCompilers >&2
+
+    if [[ -n "${dontSetupCUDAToolkitCompilers-}" ]] ; then
+        return
+    fi
+
+    # Point NVCC at a compatible compiler
+
+    # For CMake-based projects:
+    # https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables
+    # https://cmake.org/cmake/help/latest/envvar/CUDAHOSTCXX.html
+    # https://cmake.org/cmake/help/latest/variable/CMAKE_CUDA_HOST_COMPILER.html
+
+    export cmakeFlags+=" -DCUDA_HOST_COMPILER=@ccFullPath@"
+    export cmakeFlags+=" -DCMAKE_CUDA_HOST_COMPILER=@ccFullPath@"
+
+    # For non-CMake projects:
+    # We prepend --compiler-bindir to nvcc flags.
+    # Downstream packages can override these, because NVCC
+    # uses the last --compiler-bindir it gets on the command line.
+    # FIXME: this results in "incompatible redefinition" warnings.
+    # https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#compiler-bindir-directory-ccbin
+    if [ -z "${CUDAHOSTCXX-}" ]; then
+      export CUDAHOSTCXX="@ccFullPath@";
+    fi
+
+    export NVCC_PREPEND_FLAGS+=" --compiler-bindir=@ccRoot@/bin"
+
+    # NOTE: We set -Xfatbin=-compress-all, which reduces the size of the compiled
+    #   binaries. If binaries grow over 2GB, they will fail to link. This is a problem for us, as
+    #   the default set of CUDA capabilities we build can regularly cause this to occur (for
+    #   example, with Magma).
+    #
+    # @SomeoneSerge: original comment was made by @ConnorBaker in .../cudatoolkit/common.nix
+    if [[ -z "${dontCompressFatbin-}" ]]; then
+        export NVCC_PREPEND_FLAGS+=" -Xfatbin=-compress-all"
+    fi
+
+    # CMake's enable_language(CUDA) runs a compiler test and it doesn't account for
+    # CUDAToolkit_ROOT. We have to help it locate libcudart
+    if [[ -z "${nvccDontPrependCudartFlags-}" ]] ; then
+        if [[ ! -v cudaOutputToPath["cuda_cudart-out"] ]] ; then
+            echo "setupCUDAToolkitCompilers: missing cudaPackages.cuda_cudart. This may become an an error in the future" >&2
+            # exit 1
+        fi
+        for pkg in "${!cudaOutputToPath[@]}" ; do
+            [[ ! "$pkg" = cuda_cudart* ]] && continue
+
+            local path="${cudaOutputToPath[$pkg]}"
+            if [[ -d "$path/include" ]] ; then
+                export NVCC_PREPEND_FLAGS+=" -I$path/include"
+            fi
+            if [[ -d "$path/lib" ]] ; then
+                export NVCC_PREPEND_FLAGS+=" -L$path/lib"
+            fi
+        done
+    fi
+}
+preConfigureHooks+=(setupCUDAToolkitCompilers)
+
+propagateCudaLibraries() {
+    (( "${NIX_DEBUG:-0}" >= 1 )) && echo "propagateCudaLibraries: cudaPropagateToOutput=$cudaPropagateToOutput cudaHostPathsSeen=${!cudaHostPathsSeen[*]}" >&2
+
+    [[ -z "${cudaPropagateToOutput-}" ]] && return
+
+    mkdir -p "${!cudaPropagateToOutput}/nix-support"
+    # One'd expect this should be propagated-bulid-build-deps, but that doesn't seem to work
+    echo "@setupCudaHook@" >> "${!cudaPropagateToOutput}/nix-support/propagated-native-build-inputs"
+
+    local propagatedBuildInputs=( "${!cudaHostPathsSeen[@]}" )
+    for output in $(getAllOutputNames) ; do
+        if [[ ! "$output" = "$cudaPropagateToOutput" ]] ; then
+            propagatedBuildInputs+=( "${!output}" )
+        fi
+        break
+    done
+
+    # One'd expect this should be propagated-host-host-deps, but that doesn't seem to work
+    printWords "${propagatedBuildInputs[@]}" >> "${!cudaPropagateToOutput}/nix-support/propagated-build-inputs"
+}
+postFixupHooks+=(propagateCudaLibraries)
diff --git a/pkgs/development/cuda-modules/tensorrt/fixup.nix b/pkgs/development/cuda-modules/tensorrt/fixup.nix
new file mode 100644
index 0000000000000..d713189328ed7
--- /dev/null
+++ b/pkgs/development/cuda-modules/tensorrt/fixup.nix
@@ -0,0 +1,113 @@
+{
+  cudaVersion,
+  final,
+  hostPlatform,
+  lib,
+  mkVersionedPackageName,
+  package,
+  patchelf,
+  requireFile,
+  ...
+}:
+let
+  inherit (lib)
+    maintainers
+    meta
+    strings
+    versions
+    ;
+in
+finalAttrs: prevAttrs: {
+  # Useful for inspecting why something went wrong.
+  brokenConditions =
+    let
+      cudaTooOld = strings.versionOlder cudaVersion package.minCudaVersion;
+      cudaTooNew =
+        (package.maxCudaVersion != null) && strings.versionOlder package.maxCudaVersion cudaVersion;
+      cudnnVersionIsSpecified = package.cudnnVersion != null;
+      cudnnVersionSpecified = versions.majorMinor package.cudnnVersion;
+      cudnnVersionProvided = versions.majorMinor finalAttrs.passthru.cudnn.version;
+      cudnnTooOld =
+        cudnnVersionIsSpecified && (strings.versionOlder cudnnVersionProvided cudnnVersionSpecified);
+      cudnnTooNew =
+        cudnnVersionIsSpecified && (strings.versionOlder cudnnVersionSpecified cudnnVersionProvided);
+    in
+    prevAttrs.brokenConditions
+    // {
+      "CUDA version is too old" = cudaTooOld;
+      "CUDA version is too new" = cudaTooNew;
+      "CUDNN version is too old" = cudnnTooOld;
+      "CUDNN version is too new" = cudnnTooNew;
+    };
+
+  src = requireFile {
+    name = package.filename;
+    inherit (package) hash;
+    message = ''
+      To use the TensorRT derivation, you must join the NVIDIA Developer Program and
+      download the ${package.version} TAR package for CUDA ${cudaVersion} from
+      ${finalAttrs.meta.homepage}.
+
+      Once you have downloaded the file, add it to the store with the following
+      command, and try building this derivation again.
+
+      $ nix-store --add-fixed sha256 ${package.filename}
+    '';
+  };
+
+  # We need to look inside the extracted output to get the files we need.
+  sourceRoot = "TensorRT-${finalAttrs.version}";
+
+  buildInputs = prevAttrs.buildInputs ++ [finalAttrs.passthru.cudnn.lib];
+
+  preInstall =
+    let
+      targetArch =
+        if hostPlatform.isx86_64 then
+          "x86_64-linux-gnu"
+        else if hostPlatform.isAarch64 then
+          "aarch64-linux-gnu"
+        else
+          throw "Unsupported architecture";
+    in
+    (prevAttrs.preInstall or "")
+    + ''
+      # Replace symlinks to bin and lib with the actual directories from targets.
+      for dir in bin lib; do
+        rm "$dir"
+        mv "targets/${targetArch}/$dir" "$dir"
+      done
+    '';
+
+  # Tell autoPatchelf about runtime dependencies.
+  postFixup =
+    let
+      versionTriple = "${versions.majorMinor finalAttrs.version}.${versions.patch finalAttrs.version}";
+    in
+    (prevAttrs.postFixup or "")
+    + ''
+      ${meta.getExe' patchelf "patchelf"} --add-needed libnvinfer.so \
+        "$lib/lib/libnvinfer.so.${versionTriple}" \
+        "$lib/lib/libnvinfer_plugin.so.${versionTriple}" \
+        "$lib/lib/libnvinfer_builder_resource.so.${versionTriple}"
+    '';
+
+  passthru = {
+    useCudatoolkitRunfile = strings.versionOlder cudaVersion "11.3.999";
+    # The CUDNN used with TensorRT.
+    # If null, the default cudnn derivation will be used.
+    # If a version is specified, the cudnn derivation with that version will be used,
+    # unless it is not available, in which case the default cudnn derivation will be used.
+    cudnn =
+      let
+        desiredName = mkVersionedPackageName "cudnn" package.cudnnVersion;
+        desiredIsAvailable = final ? desiredName;
+      in
+      if package.cudnnVersion == null || !desiredIsAvailable then final.cudnn else final.${desiredName};
+  };
+
+  meta = prevAttrs.meta // {
+    homepage = "https://developer.nvidia.com/tensorrt";
+    maintainers = prevAttrs.meta.maintainers ++ [maintainers.aidalgol];
+  };
+}
diff --git a/pkgs/development/cuda-modules/tensorrt/releases.nix b/pkgs/development/cuda-modules/tensorrt/releases.nix
index b5cae4aca4e1c..d6a1f0487dd43 100644
--- a/pkgs/development/cuda-modules/tensorrt/releases.nix
+++ b/pkgs/development/cuda-modules/tensorrt/releases.nix
@@ -1,4 +1,5 @@
 # NOTE: Check https://developer.nvidia.com/nvidia-tensorrt-8x-download.
+# Version policy is to keep the latest minor release for each major release.
 {
   tensorrt.releases = {
     # jetson
@@ -112,7 +113,7 @@
         version = "8.6.1.6";
         minCudaVersion = "11.0";
         maxCudaVersion = "11.8";
-        cudnnVersion = null;
+        cudnnVersion = "8.9";
         filename = "TensorRT-8.6.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz";
         hash = "sha256-Fb/mBT1F/uxF7McSOpEGB2sLQ/oENfJC2J3KB3gzd1k=";
       }
@@ -120,7 +121,7 @@
         version = "8.6.1.6";
         minCudaVersion = "12.0";
         maxCudaVersion = "12.1";
-        cudnnVersion = null;
+        cudnnVersion = "8.9";
         filename = "TensorRT-8.6.1.6.Linux.x86_64-gnu.cuda-12.0.tar.gz";
         hash = "sha256-D4FXpfxTKZQ7M4uJNZE3M1CvqQyoEjnNrddYDNHrolQ=";
       }
diff --git a/pkgs/development/cuda-modules/tensorrt/shims.nix b/pkgs/development/cuda-modules/tensorrt/shims.nix
new file mode 100644
index 0000000000000..8be3e7988bb34
--- /dev/null
+++ b/pkgs/development/cuda-modules/tensorrt/shims.nix
@@ -0,0 +1,16 @@
+# Shims to mimic the shape of ../modules/generic/manifests/{feature,redistrib}/release.nix
+{package, redistArch}:
+{
+  featureRelease.${redistArch}.outputs = {
+    bin = true;
+    lib = true;
+    static = true;
+    dev = true;
+    sample = true;
+    python = true;
+  };
+  redistribRelease = {
+    name = "TensorRT: a high-performance deep learning interface";
+    inherit (package) version;
+  };
+}
diff --git a/pkgs/development/libraries/science/math/cudnn/extension.nix b/pkgs/development/libraries/science/math/cudnn/extension.nix
deleted file mode 100644
index d4c83428980d3..0000000000000
--- a/pkgs/development/libraries/science/math/cudnn/extension.nix
+++ /dev/null
@@ -1,66 +0,0 @@
-# Support matrix can be found at
-# https://docs.nvidia.com/deeplearning/cudnn/archives/cudnn-880/support-matrix/index.html
-# Type aliases
-# Release = {
-#   version: String,
-#   minCudaVersion: String,
-#   maxCudaVersion: String,
-#   url: String,
-#   hash: String,
-# }
-final: prev: let
-  inherit (final) callPackage;
-  inherit (prev) cudaVersion;
-  inherit (prev.lib) attrsets lists versions;
-  inherit (prev.lib.strings) replaceStrings versionAtLeast versionOlder;
-
-  # Compute versioned attribute name to be used in this package set
-  # Patch version changes should not break the build, so we only use major and minor
-  # computeName :: String -> String
-  computeName = version: "cudnn_${replaceStrings ["."] ["_"] (versions.majorMinor version)}";
-
-  # Check whether a CUDNN release supports our CUDA version
-  # Thankfully we're able to do lexicographic comparison on the version strings
-  # isSupported :: Release -> Bool
-  isSupported = release:
-    versionAtLeast cudaVersion release.minCudaVersion
-    && versionAtLeast release.maxCudaVersion cudaVersion;
-
-  # useCudatoolkitRunfile :: Bool
-  useCudatoolkitRunfile = versionOlder cudaVersion "11.3.999";
-
-  # buildCuDnnPackage :: Release -> Derivation
-  buildCuDnnPackage = callPackage ./generic.nix {inherit useCudatoolkitRunfile;};
-
-  # Reverse the list to have the latest release first
-  # cudnnReleases :: List Release
-  cudnnReleases = lists.reverseList (builtins.import ./releases.nix);
-
-  # Check whether a CUDNN release supports our CUDA version
-  # supportedReleases :: List Release
-  supportedReleases = builtins.filter isSupported cudnnReleases;
-
-  # Function to transform our releases into build attributes
-  # toBuildAttrs :: Release -> { name: String, value: Derivation }
-  toBuildAttrs = release: {
-    name = computeName release.version;
-    value = buildCuDnnPackage release;
-  };
-
-  # Add all supported builds as attributes
-  # allBuilds :: AttrSet String Derivation
-  allBuilds = builtins.listToAttrs (builtins.map toBuildAttrs supportedReleases);
-
-  defaultBuild = attrsets.optionalAttrs (supportedReleases != []) {
-    cudnn = let
-      # The latest release is the first element of the list and will be our default choice
-      # latestReleaseName :: String
-      latestReleaseName = computeName (builtins.head supportedReleases).version;
-    in
-      allBuilds.${latestReleaseName};
-  };
-
-  # builds :: AttrSet String Derivation
-  builds = allBuilds // defaultBuild;
-in
-  builds
diff --git a/pkgs/development/libraries/science/math/cudnn/generic.nix b/pkgs/development/libraries/science/math/cudnn/generic.nix
deleted file mode 100644
index b9f101d80fa30..0000000000000
--- a/pkgs/development/libraries/science/math/cudnn/generic.nix
+++ /dev/null
@@ -1,170 +0,0 @@
-{ stdenv,
-  backendStdenv,
-  lib,
-  lndir,
-  zlib,
-  useCudatoolkitRunfile ? false,
-  cudaVersion,
-  cudaMajorVersion,
-  cudatoolkit, # For cuda < 11
-  libcublas ? null, # cuda <11 doesn't ship redist packages
-  autoPatchelfHook,
-  autoAddOpenGLRunpathHook,
-  fetchurl,
-}: {
-  version,
-  url,
-  hash,
-  minCudaVersion,
-  maxCudaVersion,
-}:
-assert useCudatoolkitRunfile || (libcublas != null); let
-  inherit (lib) lists strings trivial versions;
-
-  # majorMinorPatch :: String -> String
-  majorMinorPatch = (trivial.flip trivial.pipe) [
-    (versions.splitVersion)
-    (lists.take 3)
-    (strings.concatStringsSep ".")
-  ];
-
-  # versionTriple :: String
-  # Version with three components: major.minor.patch
-  versionTriple = majorMinorPatch version;
-in
-  backendStdenv.mkDerivation {
-    pname = "cudatoolkit-${cudaMajorVersion}-cudnn";
-    version = versionTriple;
-    strictDeps = true;
-    outputs = ["out" "lib" "static" "dev"];
-
-    src = fetchurl {
-      inherit url hash;
-    };
-
-    # We do need some other phases, like configurePhase, so the multiple-output setup hook works.
-    dontBuild = true;
-
-    # Check and normalize Runpath against DT_NEEDED using autoPatchelf.
-    # Prepend /run/opengl-driver/lib using addOpenGLRunpath for dlopen("libcudacuda.so")
-    nativeBuildInputs = [
-      autoPatchelfHook
-      autoAddOpenGLRunpathHook
-    ];
-
-    # Used by autoPatchelfHook
-    buildInputs = [
-      # Note this libstdc++ isn't from the (possibly older) nvcc-compatible
-      # stdenv, but from the (newer) stdenv that the rest of nixpkgs uses
-      stdenv.cc.cc.lib
-
-      zlib
-    ] ++ lists.optionals useCudatoolkitRunfile [
-      cudatoolkit
-    ] ++ lists.optionals (!useCudatoolkitRunfile) [
-      libcublas.lib
-    ];
-
-    # We used to patch Runpath here, but now we use autoPatchelfHook
-    #
-    # Note also that version <=8.3.0 contained a subdirectory "lib64/" but in
-    # version 8.3.2 it seems to have been renamed to simply "lib/".
-    #
-    # doc and dev have special output handling. Other outputs need to be moved to their own
-    # output.
-    # Note that moveToOutput operates on all outputs:
-    # https://github.com/NixOS/nixpkgs/blob/2920b6fc16a9ed5d51429e94238b28306ceda79e/pkgs/build-support/setup-hooks/multiple-outputs.sh#L105-L107
-    installPhase =
-      ''
-        runHook preInstall
-
-        mkdir -p "$out"
-        mv * "$out"
-        moveToOutput "lib64" "$lib"
-        moveToOutput "lib" "$lib"
-        moveToOutput "**/*.a" "$static"
-
-        runHook postInstall
-      '';
-
-    # Without --add-needed autoPatchelf forgets $ORIGIN on cuda>=8.0.5.
-    postFixup = strings.optionalString (strings.versionAtLeast versionTriple "8.0.5") ''
-      patchelf $lib/lib/libcudnn.so --add-needed libcudnn_cnn_infer.so
-      patchelf $lib/lib/libcudnn_ops_infer.so --add-needed libcublas.so --add-needed libcublasLt.so
-    '';
-
-    # The out output leverages the same functionality which backs the `symlinkJoin` function in
-    # Nixpkgs:
-    # https://github.com/NixOS/nixpkgs/blob/d8b2a92df48f9b08d68b0132ce7adfbdbc1fbfac/pkgs/build-support/trivial-builders/default.nix#L510
-    #
-    # That should allow us to emulate "fat" default outputs without having to actually create them.
-    #
-    # It is important that this run after the autoPatchelfHook, otherwise the symlinks in out will reference libraries in lib, creating a circular dependency.
-    postPhases = ["postPatchelf"];
-    # For each output, create a symlink to it in the out output.
-    # NOTE: We must recreate the out output here, because the setup hook will have deleted it
-    # if it was empty.
-    # NOTE: Do not use optionalString based on whether `outputs` contains only `out` -- phases
-    # which are empty strings are skipped/unset and result in errors of the form "command not
-    # found: <customPhaseName>".
-    postPatchelf = ''
-      mkdir -p "$out"
-      ${lib.meta.getExe lndir} "$lib" "$out"
-      ${lib.meta.getExe lndir} "$static" "$out"
-      ${lib.meta.getExe lndir} "$dev" "$out"
-    '';
-
-    passthru = {
-      inherit useCudatoolkitRunfile;
-
-      cudatoolkit =
-        trivial.warn
-        ''
-          cudnn.cudatoolkit passthru attribute is deprecated;
-          if your derivation uses cudnn directly, it should probably consume cudaPackages instead
-        ''
-        cudatoolkit;
-
-      majorVersion = versions.major versionTriple;
-    };
-
-    # Setting propagatedBuildInputs to false will prevent outputs known to the multiple-outputs
-    # from depending on `out` by default.
-    # https://github.com/NixOS/nixpkgs/blob/2920b6fc16a9ed5d51429e94238b28306ceda79e/pkgs/build-support/setup-hooks/multiple-outputs.sh#L196
-    # Indeed, we want to do the opposite -- fat "out" outputs that contain all the other outputs.
-    propagatedBuildOutputs = false;
-
-    # By default, if the dev output exists it just uses that.
-    # However, because we disabled propagatedBuildOutputs, dev doesn't contain libraries or
-    # anything of the sort. To remedy this, we set outputSpecified to true, and use
-    # outputsToInstall, which tells Nix which outputs to use when the package name is used
-    # unqualified (that is, without an explicit output).
-    outputSpecified = true;
-
-    meta = with lib; {
-      # Check that the cudatoolkit version satisfies our min/max constraints (both
-      # inclusive). We mark the package as broken if it fails to satisfies the
-      # official version constraints (as recorded in default.nix). In some cases
-      # you _may_ be able to smudge version constraints, just know that you're
-      # embarking into unknown and unsupported territory when doing so.
-      broken =
-        strings.versionOlder cudaVersion minCudaVersion
-        || strings.versionOlder maxCudaVersion cudaVersion;
-      description = "NVIDIA CUDA Deep Neural Network library (cuDNN)";
-      homepage = "https://developer.nvidia.com/cudnn";
-      sourceProvenance = with sourceTypes; [binaryNativeCode];
-      license = {
-        shortName = "cuDNN EULA";
-        fullName = "NVIDIA cuDNN Software License Agreement (EULA)";
-        url = "https://docs.nvidia.com/deeplearning/sdk/cudnn-sla/index.html#supplement";
-        free = false;
-      } // lib.optionalAttrs (!useCudatoolkitRunfile) {
-        redistributable = true;
-      };
-      platforms = ["x86_64-linux"];
-      maintainers = with maintainers; [mdaiter samuela];
-      # Force the use of the default, fat output by default (even though `dev` exists, which
-      # causes Nix to prefer that output over the others if outputSpecified isn't set).
-      outputsToInstall = ["out"];
-    };
-  }
diff --git a/pkgs/development/libraries/science/math/cutensor/generic.nix b/pkgs/development/libraries/science/math/cutensor/generic.nix
deleted file mode 100644
index 02fe13851620b..0000000000000
--- a/pkgs/development/libraries/science/math/cutensor/generic.nix
+++ /dev/null
@@ -1,88 +0,0 @@
-{ stdenv
-, lib
-, libPath
-, cuda_cudart
-, cudaMajorVersion
-, cuda_nvcc
-, cudatoolkit
-, libcublas
-, fetchurl
-, autoPatchelfHook
-, addOpenGLRunpath
-
-, version
-, hash
-}:
-
-let
-  mostOfVersion = builtins.concatStringsSep "."
-    (lib.take 3 (lib.versions.splitVersion version));
-  platform = "${stdenv.hostPlatform.parsed.kernel.name}-${stdenv.hostPlatform.parsed.cpu.name}";
-in
-
-stdenv.mkDerivation {
-  pname = "cutensor-cu${cudaMajorVersion}";
-  inherit version;
-
-  src = fetchurl {
-    url = if lib.versionOlder mostOfVersion "1.3.3"
-      then "https://developer.download.nvidia.com/compute/cutensor/${mostOfVersion}/local_installers/libcutensor-${platform}-${version}.tar.gz"
-      else "https://developer.download.nvidia.com/compute/cutensor/redist/libcutensor/${platform}/libcutensor-${platform}-${version}-archive.tar.xz";
-    inherit hash;
-  };
-
-  outputs = [ "out" "dev" ];
-
-  nativeBuildInputs = [
-    autoPatchelfHook
-    addOpenGLRunpath
-    cuda_nvcc
-  ];
-
-  buildInputs = [
-    stdenv.cc.cc.lib
-    cuda_cudart
-    libcublas
-  ];
-
-  # Set RUNPATH so that libcuda in /run/opengl-driver(-32)/lib can be found.
-  # See the explanation in addOpenGLRunpath.
-  installPhase = ''
-    mkdir -p "$out" "$dev"
-
-    if [[ ! -d "${libPath}" ]] ; then
-      echo "Cutensor: ${libPath} does not exist, only found:" >&2
-      find "$(dirname ${libPath})"/ -maxdepth 1 >&2
-      echo "This cutensor release might not support your cudatoolkit version" >&2
-      exit 1
-    fi
-
-    mv include "$dev"
-    mv ${libPath} "$out/lib"
-
-    function finalRPathFixups {
-      for lib in $out/lib/lib*.so; do
-        addOpenGLRunpath $lib
-      done
-    }
-    postFixupHooks+=(finalRPathFixups)
-  '';
-
-  passthru = {
-    cudatoolkit = lib.warn "cutensor.passthru: cudaPackages.cudatoolkit is deprecated" cudatoolkit;
-    majorVersion = lib.versions.major version;
-  };
-
-  meta = with lib; {
-    description = "cuTENSOR: A High-Performance CUDA Library For Tensor Primitives";
-    homepage = "https://developer.nvidia.com/cutensor";
-    sourceProvenance = with sourceTypes; [ binaryNativeCode ];
-    license = licenses.unfreeRedistributable // {
-      shortName = "cuTENSOR EULA";
-      name = "cuTENSOR SUPPLEMENT TO SOFTWARE LICENSE AGREEMENT FOR NVIDIA SOFTWARE DEVELOPMENT KITS";
-      url = "https://docs.nvidia.com/cuda/cutensor/license.html";
-    };
-    platforms = [ "x86_64-linux" ];
-    maintainers = with maintainers; [ obsidian-systems-maintenance ];
-  };
-}
diff --git a/pkgs/development/libraries/science/math/nccl/default.nix b/pkgs/development/libraries/science/math/nccl/default.nix
deleted file mode 100644
index d877e19a6dd68..0000000000000
--- a/pkgs/development/libraries/science/math/nccl/default.nix
+++ /dev/null
@@ -1,113 +0,0 @@
-# NOTE: Though NCCL is called within the cudaPackages package set, we avoid passing in
-# the names of dependencies from that package set directly to avoid evaluation errors
-# in the case redistributable packages are not available.
-{
-  lib,
-  fetchFromGitHub,
-  python3,
-  which,
-  cudaPackages,
-  # passthru.updateScript
-  gitUpdater,
-}:
-let
-
-  inherit (cudaPackages)
-    autoAddOpenGLRunpathHook
-    backendStdenv
-    cuda_cccl
-    cuda_cudart
-    cuda_nvcc
-    cudaFlags
-    cudatoolkit
-    cudaVersion
-    ;
-in
-backendStdenv.mkDerivation (
-  finalAttrs: {
-    pname = "nccl";
-    version = "2.19.3-1";
-
-    src = fetchFromGitHub {
-      owner = "NVIDIA";
-      repo = finalAttrs.pname;
-      rev = "v${finalAttrs.version}";
-      hash = "sha256-59FlOKM5EB5Vkm4dZBRCkn+IgIcdQehE+FyZAdTCT/A=";
-    };
-
-    strictDeps = true;
-
-    outputs = [
-      "out"
-      "dev"
-    ];
-
-    nativeBuildInputs =
-      [
-        which
-        autoAddOpenGLRunpathHook
-        python3
-      ]
-      ++ lib.optionals (lib.versionOlder cudaVersion "11.4") [cudatoolkit]
-      ++ lib.optionals (lib.versionAtLeast cudaVersion "11.4") [cuda_nvcc];
-
-    buildInputs =
-      lib.optionals (lib.versionOlder cudaVersion "11.4") [cudatoolkit]
-      ++ lib.optionals (lib.versionAtLeast cudaVersion "11.4") [
-        cuda_nvcc.dev # crt/host_config.h
-        cuda_cudart
-      ]
-      # NOTE: CUDA versions in Nixpkgs only use a major and minor version. When we do comparisons
-      # against other version, like below, it's important that we use the same format. Otherwise,
-      # we'll get incorrect results.
-      # For example, lib.versionAtLeast "12.0" "12.0.0" == false.
-      ++ lib.optionals (lib.versionAtLeast cudaVersion "12.0") [cuda_cccl];
-
-    env.NIX_CFLAGS_COMPILE = toString ["-Wno-unused-function"];
-
-    preConfigure = ''
-      patchShebangs ./src/device/generate.py
-      makeFlagsArray+=(
-        "NVCC_GENCODE=${lib.concatStringsSep " " cudaFlags.gencode}"
-      )
-    '';
-
-    makeFlags =
-      ["PREFIX=$(out)"]
-      ++ lib.optionals (lib.versionOlder cudaVersion "11.4") [
-        "CUDA_HOME=${cudatoolkit}"
-        "CUDA_LIB=${lib.getLib cudatoolkit}/lib"
-        "CUDA_INC=${lib.getDev cudatoolkit}/include"
-      ]
-      ++ lib.optionals (lib.versionAtLeast cudaVersion "11.4") [
-        "CUDA_HOME=${cuda_nvcc}"
-        "CUDA_LIB=${lib.getLib cuda_cudart}/lib"
-        "CUDA_INC=${lib.getDev cuda_cudart}/include"
-      ];
-
-    enableParallelBuilding = true;
-
-    postFixup = ''
-      moveToOutput lib/libnccl_static.a $dev
-    '';
-
-    passthru.updateScript = gitUpdater {
-      inherit (finalAttrs) pname version;
-      rev-prefix = "v";
-    };
-
-    meta = with lib; {
-      description = "Multi-GPU and multi-node collective communication primitives for NVIDIA GPUs";
-      homepage = "https://developer.nvidia.com/nccl";
-      license = licenses.bsd3;
-      platforms = platforms.linux;
-      maintainers =
-        with maintainers;
-        [
-          mdaiter
-          orivej
-        ]
-        ++ teams.cuda.members;
-    };
-  }
-)
diff --git a/pkgs/development/libraries/science/math/nccl/tests.nix b/pkgs/development/libraries/science/math/nccl/tests.nix
deleted file mode 100644
index 5c2f29b7ed564..0000000000000
--- a/pkgs/development/libraries/science/math/nccl/tests.nix
+++ /dev/null
@@ -1,84 +0,0 @@
-# NOTE: Though NCCL tests is called within the cudaPackages package set, we avoid passing in
-# the names of dependencies from that package set directly to avoid evaluation errors
-# in the case redistributable packages are not available.
-{
-  config,
-  cudaPackages,
-  fetchFromGitHub,
-  gitUpdater,
-  lib,
-  mpi,
-  mpiSupport ? false,
-  which,
-}:
-let
-  inherit (cudaPackages)
-    backendStdenv
-    cuda_cccl
-    cuda_cudart
-    cuda_nvcc
-    cudatoolkit
-    cudaVersion
-    nccl
-    ;
-in
-backendStdenv.mkDerivation (
-  finalAttrs: {
-
-    pname = "nccl-tests";
-    version = "2.13.8";
-
-    src = fetchFromGitHub {
-      owner = "NVIDIA";
-      repo = finalAttrs.pname;
-      rev = "v${finalAttrs.version}";
-      hash = "sha256-dxLoflsTHDBnZRTzoXdm30OyKpLlRa73b784YWALBHg=";
-    };
-
-    strictDeps = true;
-
-    nativeBuildInputs =
-      [which]
-      ++ lib.optionals (lib.versionOlder cudaVersion "11.4") [cudatoolkit]
-      ++ lib.optionals (lib.versionAtLeast cudaVersion "11.4") [cuda_nvcc];
-
-    buildInputs =
-      [nccl]
-      ++ lib.optionals (lib.versionOlder cudaVersion "11.4") [cudatoolkit]
-      ++ lib.optionals (lib.versionAtLeast cudaVersion "11.4") [
-        cuda_nvcc.dev # crt/host_config.h
-        cuda_cudart
-      ]
-      ++ lib.optionals (lib.versionAtLeast cudaVersion "12.0") [
-        cuda_cccl.dev # <nv/target>
-      ]
-      ++ lib.optionals mpiSupport [mpi];
-
-    makeFlags =
-      ["NCCL_HOME=${nccl}"]
-      ++ lib.optionals (lib.versionOlder cudaVersion "11.4") ["CUDA_HOME=${cudatoolkit}"]
-      ++ lib.optionals (lib.versionAtLeast cudaVersion "11.4") ["CUDA_HOME=${cuda_nvcc}"]
-      ++ lib.optionals mpiSupport ["MPI=1"];
-
-    enableParallelBuilding = true;
-
-    installPhase = ''
-      mkdir -p $out/bin
-      cp -r build/* $out/bin/
-    '';
-
-    passthru.updateScript = gitUpdater {
-      inherit (finalAttrs) pname version;
-      rev-prefix = "v";
-    };
-
-    meta = with lib; {
-      description = "Tests to check both the performance and the correctness of NVIDIA NCCL operations";
-      homepage = "https://github.com/NVIDIA/nccl-tests";
-      platforms = platforms.linux;
-      license = licenses.bsd3;
-      broken = !config.cudaSupport || (mpiSupport && mpi == null);
-      maintainers = with maintainers; [jmillerpdt] ++ teams.cuda.members;
-    };
-  }
-)
diff --git a/pkgs/development/libraries/science/math/tensorrt/generic.nix b/pkgs/development/libraries/science/math/tensorrt/generic.nix
deleted file mode 100644
index 2bcdd8e588cf0..0000000000000
--- a/pkgs/development/libraries/science/math/tensorrt/generic.nix
+++ /dev/null
@@ -1,95 +0,0 @@
-{ lib
-, backendStdenv
-, requireFile
-, autoPatchelfHook
-, autoAddOpenGLRunpathHook
-, cudaVersion
-, cudatoolkit
-, cudnn
-}:
-
-{ enable ? true
-, fullVersion
-, fileVersionCudnn ? null
-, tarball
-, sha256
-, supportedCudaVersions ? [ ]
-}:
-
-assert !enable || fileVersionCudnn == null || lib.assertMsg (lib.strings.versionAtLeast cudnn.version fileVersionCudnn)
-  "This version of TensorRT requires at least cuDNN ${fileVersionCudnn} (current version is ${cudnn.version})";
-
-backendStdenv.mkDerivation rec {
-  pname = "cudatoolkit-${cudatoolkit.majorVersion}-tensorrt";
-  version = fullVersion;
-  src = if !enable then null else
-  requireFile rec {
-    name = tarball;
-    inherit sha256;
-    message = ''
-      To use the TensorRT derivation, you must join the NVIDIA Developer Program and
-      download the ${version} Linux x86_64 TAR package for CUDA ${cudaVersion} from
-      ${meta.homepage}.
-
-      Once you have downloaded the file, add it to the store with the following
-      command, and try building this derivation again.
-
-      $ nix-store --add-fixed sha256 ${name}
-    '';
-  };
-
-  outputs = [ "out" "dev" ];
-
-  nativeBuildInputs = lib.optionals enable [
-    autoPatchelfHook
-    autoAddOpenGLRunpathHook
-  ];
-
-  # Used by autoPatchelfHook
-  buildInputs = lib.optionals enable [
-    backendStdenv.cc.cc.lib # libstdc++
-    cudatoolkit
-    cudnn
-  ];
-
-  sourceRoot = "TensorRT-${version}";
-
-  installPhase = ''
-    install --directory "$dev" "$out"
-    mv include "$dev"
-    mv targets/x86_64-linux-gnu/lib "$out"
-    install -D --target-directory="$out/bin" targets/x86_64-linux-gnu/bin/trtexec
-  '';
-
-  # Tell autoPatchelf about runtime dependencies.
-  # (postFixup phase is run before autoPatchelfHook.)
-  postFixup =
-    let
-      mostOfVersion = builtins.concatStringsSep "."
-        (lib.take 3 (lib.versions.splitVersion version));
-    in
-    ''
-      echo 'Patching RPATH of libnvinfer libs'
-      patchelf --debug --add-needed libnvinfer.so \
-        "$out/lib/libnvinfer.so.${mostOfVersion}" \
-        "$out/lib/libnvinfer_plugin.so.${mostOfVersion}" \
-        "$out/lib/libnvinfer_builder_resource.so.${mostOfVersion}"
-    '';
-
-  passthru.stdenv = backendStdenv;
-  passthru.enable = enable;
-
-  meta = with lib; {
-    # Check that the cudatoolkit version satisfies our min/max constraints (both
-    # inclusive). We mark the package as broken if it fails to satisfies the
-    # official version constraints (as recorded in default.nix). In some cases
-    # you _may_ be able to smudge version constraints, just know that you're
-    # embarking into unknown and unsupported territory when doing so.
-    broken = !enable || !(elem cudaVersion supportedCudaVersions);
-    description = "TensorRT: a high-performance deep learning interface";
-    homepage = "https://developer.nvidia.com/tensorrt";
-    license = licenses.unfree;
-    platforms = [ "x86_64-linux" ];
-    maintainers = with maintainers; [ aidalgol ];
-  };
-}
diff --git a/pkgs/development/python-modules/tensorrt/default.nix b/pkgs/development/python-modules/tensorrt/default.nix
index 475e7627e4baa..e108b1a773cba 100644
--- a/pkgs/development/python-modules/tensorrt/default.nix
+++ b/pkgs/development/python-modules/tensorrt/default.nix
@@ -11,7 +11,7 @@ let
 in
 buildPythonPackage rec {
   pname = "tensorrt";
-  version = cudaPackages.tensorrt.version;
+  version = lib.optionalString (cudaPackages ? tensorrt) cudaPackages.tensorrt.version;
 
   src = cudaPackages.tensorrt.src;
 
@@ -48,5 +48,8 @@ buildPythonPackage rec {
     license = licenses.unfree;
     platforms = [ "x86_64-linux" ];
     maintainers = with maintainers; [ aidalgol ];
+    broken =
+      !(cudaPackages ? tensorrt)
+      || !(cudaPackages ? cudnn);
   };
 }
diff --git a/pkgs/test/cuda/cuda-library-samples/extension.nix b/pkgs/test/cuda/cuda-library-samples/extension.nix
index 4c721a9e9e1bc..62de715fd0b4f 100644
--- a/pkgs/test/cuda/cuda-library-samples/extension.nix
+++ b/pkgs/test/cuda/cuda-library-samples/extension.nix
@@ -1,3 +1,14 @@
-final: prev: {
-  cuda-library-samples = final.callPackage ./generic.nix { };
-}
+{hostPlatform, lib}:
+let
+  # Samples are built around the CUDA Toolkit, which is not available for
+  # aarch64. Check for both CUDA version and platform.
+  platformIsSupported = hostPlatform.isx86_64;
+
+  # Build our extension
+  extension =
+    final: _:
+    lib.attrsets.optionalAttrs platformIsSupported {
+      cuda-library-samples = final.callPackage ./generic.nix {};
+    };
+in
+extension
diff --git a/pkgs/test/cuda/cuda-library-samples/generic.nix b/pkgs/test/cuda/cuda-library-samples/generic.nix
index e9a481c94a7a4..d4182536654e1 100644
--- a/pkgs/test/cuda/cuda-library-samples/generic.nix
+++ b/pkgs/test/cuda/cuda-library-samples/generic.nix
@@ -1,7 +1,11 @@
-{ lib, backendStdenv, fetchFromGitHub
-, cmake, addOpenGLRunpath
-, cudatoolkit
-, cutensor
+{
+  lib,
+  backendStdenv,
+  fetchFromGitHub,
+  cmake,
+  addOpenGLRunpath,
+  cudatoolkit,
+  cutensor,
 }:
 
 let
@@ -14,8 +18,11 @@ let
   };
   commonAttrs = {
     version = lib.strings.substring 0 7 rev + "-" + lib.versions.majorMinor cudatoolkit.version;
-    nativeBuildInputs = [ cmake addOpenGLRunpath ];
-    buildInputs = [ cudatoolkit ];
+    nativeBuildInputs = [
+      cmake
+      addOpenGLRunpath
+    ];
+    buildInputs = [cudatoolkit];
     postFixup = ''
       for exe in $out/bin/*; do
         addOpenGLRunpath $exe
@@ -29,43 +36,50 @@ let
         cuSPARSE, cuSOLVER, cuFFT, cuRAND, NPP and nvJPEG.
       '';
       license = lib.licenses.bsd3;
-      maintainers = with lib.maintainers; [ obsidian-systems-maintenance ];
+      maintainers = with lib.maintainers; [obsidian-systems-maintenance] ++ lib.teams.cuda.members;
     };
   };
 in
 
 {
-  cublas = backendStdenv.mkDerivation (commonAttrs // {
-    pname = "cuda-library-samples-cublas";
+  cublas = backendStdenv.mkDerivation (
+    commonAttrs
+    // {
+      pname = "cuda-library-samples-cublas";
 
-    src = "${src}/cuBLASLt";
-  });
+      src = "${src}/cuBLASLt";
+    }
+  );
 
-  cusolver = backendStdenv.mkDerivation (commonAttrs // {
-    pname = "cuda-library-samples-cusolver";
+  cusolver = backendStdenv.mkDerivation (
+    commonAttrs
+    // {
+      pname = "cuda-library-samples-cusolver";
 
-    src = "${src}/cuSOLVER";
+      src = "${src}/cuSOLVER";
 
-    sourceRoot = "cuSOLVER/gesv";
-  });
+      sourceRoot = "cuSOLVER/gesv";
+    }
+  );
 
-  cutensor = backendStdenv.mkDerivation (commonAttrs // {
-    pname = "cuda-library-samples-cutensor";
+  cutensor = backendStdenv.mkDerivation (
+    commonAttrs
+    // {
+      pname = "cuda-library-samples-cutensor";
 
-    src = "${src}/cuTENSOR";
+      src = "${src}/cuTENSOR";
 
-    buildInputs = [ cutensor ];
+      buildInputs = [cutensor];
 
-    cmakeFlags = [
-      "-DCUTENSOR_EXAMPLE_BINARY_INSTALL_DIR=${builtins.placeholder "out"}/bin"
-    ];
+      cmakeFlags = ["-DCUTENSOR_EXAMPLE_BINARY_INSTALL_DIR=${builtins.placeholder "out"}/bin"];
 
-    # CUTENSOR_ROOT is double escaped
-    postPatch = ''
-      substituteInPlace CMakeLists.txt \
-        --replace "\''${CUTENSOR_ROOT}/include" "${cutensor.dev}/include"
-    '';
+      # CUTENSOR_ROOT is double escaped
+      postPatch = ''
+        substituteInPlace CMakeLists.txt \
+          --replace "\''${CUTENSOR_ROOT}/include" "${cutensor.dev}/include"
+      '';
 
-    CUTENSOR_ROOT = cutensor;
-  });
+      CUTENSOR_ROOT = cutensor;
+    }
+  );
 }
diff --git a/pkgs/test/cuda/cuda-samples/extension.nix b/pkgs/test/cuda/cuda-samples/extension.nix
index 664349416b713..d41da90cd5d0e 100644
--- a/pkgs/test/cuda/cuda-samples/extension.nix
+++ b/pkgs/test/cuda/cuda-samples/extension.nix
@@ -1,14 +1,18 @@
-final: prev: let
-
-  sha256 = {
-    "10.0" = "1zvh4xsdyc59m87brpcmssxsjlp9dkynh4asnkcmc3g94f53l0jw";
-    "10.1" = "1s8ka0hznrni36ajhzf2gqpdrl8kd8fi047qijxks5l2abc093qd";
-    "10.2" = "01p1innzgh9siacpld6nsqimj8jkg93rk4gj8q4crn62pa5vhd94";
-    "11.0" = "1n3vjc8c7zdig2xgl5fppavrphqzhdiv9m9nk6smh4f99fwi0705";
-    "11.1" = "1kjixk50i8y1bkiwbdn5lkv342crvkmbvy1xl5j3lsa1ica21kwh";
-    "11.2" = "1p1qjvfbm28l933mmnln02rqrf0cy9kbpsyb488d1haiqzvrazl1";
-    "11.3" = "0kbibb6pgz8j5iq6284axcnmycaha9bw8qlmdp6yfwmnahq1v0yz";
-    "11.4" = "082dkk5y34wyvjgj2p5j1d00rk8xaxb9z0mhvz16bd469r1bw2qk";
+{
+  cudaVersion,
+  hostPlatform,
+  lib,
+}:
+let
+  cudaVersionToHash = {
+    "10.0" = "sha256-XAI6iiPpDVbZtFoRaP1s6VKpu9aV3bwOqqkw33QncP8=";
+    "10.1" = "sha256-DY8E2FKCFj27jPgQEB1qE9HcLn7CfSiVGdFm+yFQE+k=";
+    "10.2" = "sha256-JDW4i7rC2MwIRvKRmUd6UyJZI9bWNHqZijrB962N4QY=";
+    "11.0" = "sha256-BRwQuUvJEVi1mTbVtGODH8Obt7rXFfq6eLH9wxCTe9g=";
+    "11.1" = "sha256-kM8gFItBaTpkoT34vercmQky9qTFtsXjXMGjCMrsUc4=";
+    "11.2" = "sha256-gX6V98dRwdAQIsvru2byDLiMswCW2lrHSBSJutyWONw=";
+    "11.3" = "sha256-34MdMFS2cufNbZVixFdSUDFfLeuKIGFwLBL9d81acU0=";
+    "11.4" = "sha256-Ewu+Qk6GtGXC37CCn1ZXHc0MQAuyXCGf3J6T4cucTSA=";
     "11.5" = "sha256-AKRZbke0K59lakhTi8dX2cR2aBuWPZkiQxyKaZTvHrI=";
     "11.6" = "sha256-AsLNmAplfuQbXg9zt09tXAuFJ524EtTYsQuUlV1tPkE=";
     # The tag 11.7 of cuda-samples does not exist
@@ -16,10 +20,23 @@ final: prev: let
     "12.0" = "sha256-Lj2kbdVFrJo5xPYPMiE4BS7Z8gpU5JLKXVJhZABUe/g=";
     "12.1" = "sha256-xE0luOMq46zVsIEWwK4xjLs7NorcTIi9gbfZPVjIlqo=";
     "12.2" = "sha256-pOy0qfDjA/Nr0T9PNKKefK/63gQnJV2MQsN2g3S2yng=";
+    "12.3" = "sha256-fjVp0G6uRCWxsfe+gOwWTN+esZfk0O5uxS623u0REAk=";
   };
 
-in prev.lib.attrsets.optionalAttrs (builtins.hasAttr prev.cudaVersion sha256) {
-  cuda-samples = final.callPackage ./generic.nix {
-    sha256 = sha256.${prev.cudaVersion};
-  };
-}
+  # Samples are built around the CUDA Toolkit, which is not available for
+  # aarch64. Check for both CUDA version and platform.
+  cudaVersionIsSupported = cudaVersionToHash ? ${cudaVersion};
+  platformIsSupported = hostPlatform.isx86_64;
+  isSupported = cudaVersionIsSupported && platformIsSupported;
+
+  # Build our extension
+  extension =
+    final: _:
+    lib.attrsets.optionalAttrs isSupported {
+      cuda-samples = final.callPackage ./generic.nix {
+        inherit cudaVersion;
+        hash = cudaVersionToHash.${cudaVersion};
+      };
+    };
+in
+extension
diff --git a/pkgs/test/cuda/cuda-samples/generic.nix b/pkgs/test/cuda/cuda-samples/generic.nix
index e690f32959f2a..fb3d7cc99da95 100644
--- a/pkgs/test/cuda/cuda-samples/generic.nix
+++ b/pkgs/test/cuda/cuda-samples/generic.nix
@@ -1,70 +1,79 @@
-{ autoAddOpenGLRunpathHook
-, backendStdenv
-, cmake
-, cudatoolkit
-, cudaVersion
-, fetchFromGitHub
-, fetchpatch
-, freeimage
-, glfw3
-, lib
-, pkg-config
-, sha256
+{
+  autoAddOpenGLRunpathHook,
+  backendStdenv,
+  cmake,
+  cudatoolkit,
+  cudaVersion,
+  fetchFromGitHub,
+  fetchpatch,
+  freeimage,
+  glfw3,
+  hash,
+  lib,
+  pkg-config,
 }:
-backendStdenv.mkDerivation (finalAttrs: {
-  pname = "cuda-samples";
-  version = cudaVersion;
+let
+  inherit (lib) lists strings;
+in
+backendStdenv.mkDerivation (
+  finalAttrs: {
+    strictDeps = true;
 
-  src = fetchFromGitHub {
-    owner = "NVIDIA";
-    repo = finalAttrs.pname;
-    rev = "v${finalAttrs.version}";
-    inherit sha256;
-  };
+    pname = "cuda-samples";
+    version = cudaVersion;
 
-  nativeBuildInputs = [
-    pkg-config
-    autoAddOpenGLRunpathHook
-    glfw3
-    freeimage
-  ]
-  # CMake has to run as a native, build-time dependency for libNVVM samples.
-  ++ lib.lists.optionals (lib.strings.versionAtLeast finalAttrs.version "12.2") [
-    cmake
-  ];
+    src = fetchFromGitHub {
+      owner = "NVIDIA";
+      repo = finalAttrs.pname;
+      rev = "v${finalAttrs.version}";
+      inherit hash;
+    };
 
-  # CMake is not the primary build tool -- that's still make.
-  # As such, we disable CMake's build system.
-  dontUseCmakeConfigure = true;
+    nativeBuildInputs =
+      [
+        autoAddOpenGLRunpathHook
+        pkg-config
+      ]
+      # CMake has to run as a native, build-time dependency for libNVVM samples.
+      # However, it's not the primary build tool -- that's still make.
+      # As such, we disable CMake's build system.
+      ++ lists.optionals (strings.versionAtLeast finalAttrs.version "12.2") [cmake];
 
-  buildInputs = [ cudatoolkit ];
+    dontUseCmakeConfigure = true;
 
-  # See https://github.com/NVIDIA/cuda-samples/issues/75.
-  patches = lib.optionals (finalAttrs.version == "11.3") [
-    (fetchpatch {
-      url = "https://github.com/NVIDIA/cuda-samples/commit/5c3ec60faeb7a3c4ad9372c99114d7bb922fda8d.patch";
-      sha256 = "sha256-0XxdmNK9MPpHwv8+qECJTvXGlFxc+fIbta4ynYprfpU=";
-    })
-  ];
+    buildInputs = [
+      cudatoolkit
+      freeimage
+      glfw3
+    ];
 
-  enableParallelBuilding = true;
+    # See https://github.com/NVIDIA/cuda-samples/issues/75.
+    patches = lib.optionals (finalAttrs.version == "11.3") [
+      (fetchpatch {
+        url = "https://github.com/NVIDIA/cuda-samples/commit/5c3ec60faeb7a3c4ad9372c99114d7bb922fda8d.patch";
+        hash = "sha256-0XxdmNK9MPpHwv8+qECJTvXGlFxc+fIbta4ynYprfpU=";
+      })
+    ];
 
-  preConfigure = ''
-    export CUDA_PATH=${cudatoolkit}
-  '';
+    enableParallelBuilding = true;
 
-  installPhase = ''
-    runHook preInstall
+    preConfigure = ''
+      export CUDA_PATH=${cudatoolkit}
+    '';
 
-    install -Dm755 -t $out/bin bin/${backendStdenv.hostPlatform.parsed.cpu.name}/${backendStdenv.hostPlatform.parsed.kernel.name}/release/*
+    installPhase = ''
+      runHook preInstall
 
-    runHook postInstall
-  '';
+      install -Dm755 -t $out/bin bin/${backendStdenv.hostPlatform.parsed.cpu.name}/${backendStdenv.hostPlatform.parsed.kernel.name}/release/*
 
-  meta = {
-    description = "Samples for CUDA Developers which demonstrates features in CUDA Toolkit";
-    # CUDA itself is proprietary, but these sample apps are not.
-    license = lib.licenses.bsd3;
-    maintainers = with lib.maintainers; [ obsidian-systems-maintenance ] ++ lib.teams.cuda.members;
-  };
-})
+      runHook postInstall
+    '';
+
+    meta = {
+      description = "Samples for CUDA Developers which demonstrates features in CUDA Toolkit";
+      # CUDA itself is proprietary, but these sample apps are not.
+      license = lib.licenses.bsd3;
+      maintainers = with lib.maintainers; [obsidian-systems-maintenance] ++ lib.teams.cuda.members;
+    };
+  }
+)
diff --git a/pkgs/test/cuda/default.nix b/pkgs/test/cuda/default.nix
index c7b790e35e259..be88bd3820a90 100644
--- a/pkgs/test/cuda/default.nix
+++ b/pkgs/test/cuda/default.nix
@@ -1,7 +1,7 @@
-{ callPackage }:
+{callPackage}:
 
 rec {
-  cuda-samplesPackages = callPackage ./cuda-samples/generic.nix { };
+  cuda-samplesPackages = callPackage ./cuda-samples/generic.nix {};
   inherit (cuda-samplesPackages)
     cuda-samples_cudatoolkit_10
     cuda-samples_cudatoolkit_10_0
@@ -12,9 +12,10 @@ rec {
     cuda-samples_cudatoolkit_11_1
     cuda-samples_cudatoolkit_11_2
     cuda-samples_cudatoolkit_11_3
-    cuda-samples_cudatoolkit_11_4;
+    cuda-samples_cudatoolkit_11_4
+    ;
 
-  cuda-library-samplesPackages = callPackage ./cuda-library-samples/generic.nix { };
+  cuda-library-samplesPackages = callPackage ./cuda-library-samples/generic.nix {};
   inherit (cuda-library-samplesPackages)
     cuda-library-samples_cudatoolkit_10
     cuda-library-samples_cudatoolkit_10_1
@@ -24,5 +25,6 @@ rec {
     cuda-library-samples_cudatoolkit_11_1
     cuda-library-samples_cudatoolkit_11_2
     cuda-library-samples_cudatoolkit_11_3
-    cuda-library-samples_cudatoolkit_11_4;
+    cuda-library-samples_cudatoolkit_11_4
+    ;
 }
diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix
index ecc9681d53daa..fc0332ab8ca31 100644
--- a/pkgs/top-level/all-packages.nix
+++ b/pkgs/top-level/all-packages.nix
@@ -7304,7 +7304,7 @@ with pkgs;
   cudaPackages_10_0 = callPackage ./cuda-packages.nix { cudaVersion = "10.0"; };
   cudaPackages_10_1 = callPackage ./cuda-packages.nix { cudaVersion = "10.1"; };
   cudaPackages_10_2 = callPackage ./cuda-packages.nix { cudaVersion = "10.2"; };
-  cudaPackages_10 = cudaPackages_10_2;
+  cudaPackages_10 = recurseIntoAttrs cudaPackages_10_2;
 
   cudaPackages_11_0 = callPackage ./cuda-packages.nix { cudaVersion = "11.0"; };
   cudaPackages_11_1 = callPackage ./cuda-packages.nix { cudaVersion = "11.1"; };
@@ -7315,12 +7315,13 @@ with pkgs;
   cudaPackages_11_6 = callPackage ./cuda-packages.nix { cudaVersion = "11.6"; };
   cudaPackages_11_7 = callPackage ./cuda-packages.nix { cudaVersion = "11.7"; };
   cudaPackages_11_8 = callPackage ./cuda-packages.nix { cudaVersion = "11.8"; };
-  cudaPackages_11 = cudaPackages_11_8;
+  cudaPackages_11 = recurseIntoAttrs cudaPackages_11_8;
 
   cudaPackages_12_0 = callPackage ./cuda-packages.nix { cudaVersion = "12.0"; };
   cudaPackages_12_1 = callPackage ./cuda-packages.nix { cudaVersion = "12.1"; };
   cudaPackages_12_2 = callPackage ./cuda-packages.nix { cudaVersion = "12.2"; };
-  cudaPackages_12 = cudaPackages_12_0;
+  cudaPackages_12_3 = callPackage ./cuda-packages.nix { cudaVersion = "12.3"; };
+  cudaPackages_12 = recurseIntoAttrs cudaPackages_12_0;
 
   # Use the older cudaPackages for tensorflow and jax, as determined by cudnn
   # compatibility: https://www.tensorflow.org/install/source#gpu
@@ -7328,7 +7329,7 @@ with pkgs;
 
   # TODO: try upgrading once there is a cuDNN release supporting CUDA 12. No
   # such cuDNN release as of 2023-01-10.
-  cudaPackages = recurseIntoAttrs cudaPackages_11;
+  cudaPackages = cudaPackages_11;
 
   # TODO: move to alias
   cudatoolkit = cudaPackages.cudatoolkit;
diff --git a/pkgs/top-level/cuda-packages.nix b/pkgs/top-level/cuda-packages.nix
index 3912422785bc4..5d4d2fcf29247 100644
--- a/pkgs/top-level/cuda-packages.nix
+++ b/pkgs/top-level/cuda-packages.nix
@@ -1,88 +1,118 @@
-{ lib
-, pkgs
-, cudaVersion
+# Notes:
+#
+# Silvan (Tweag) covered some things on recursive attribute sets in the Nix Hour:
+# https://www.youtube.com/watch?v=BgnUFtd1Ivs
+#
+# I (@connorbaker) highly recommend watching it.
+#
+# Most helpful comment regarding recursive attribute sets:
+#
+# https://github.com/NixOS/nixpkgs/pull/256324#issuecomment-1749935979
+#
+# To summarize:
+#
+# - `prev` should only be used to access attributes which are going to be overriden.
+# - `final` should only be used to access `callPackage` to build new packages.
+# - Attribute names should be computable without relying on `final`.
+#   - Extensions should take arguments to build attribute names before relying on `final`.
+#
+# Silvan's recommendation then is to explicitly use `callPackage` to provide everything our extensions need
+# to compute the attribute names, without relying on `final`.
+#
+# I've (@connorbaker) attempted to do that, though I'm unsure of how this will interact with overrides.
+{
+  callPackage,
+  cudaVersion,
+  lib,
+  newScope,
+  pkgs,
 }:
-
-with lib;
-
 let
-
-  scope = makeScope pkgs.newScope (final: {
-    # Here we put package set configuration and utility functions.
-    inherit cudaVersion;
-    cudaMajorVersion = versions.major final.cudaVersion;
-    cudaMajorMinorVersion = lib.versions.majorMinor final.cudaVersion;
-    inherit lib pkgs;
-
-    addBuildInputs = drv: buildInputs: drv.overrideAttrs (oldAttrs: {
-      buildInputs = (oldAttrs.buildInputs or []) ++ buildInputs;
-    });
-  });
-
-  cutensorExtension = final: prev: let
-    ### CuTensor
-
-    buildCuTensorPackage = final.callPackage ../development/libraries/science/math/cutensor/generic.nix;
-
-    # FIXME: Include non-x86_64 platforms
-    cuTensorVersions = {
-      "1.2.2.5" = {
-        hash = "sha256-lU7iK4DWuC/U3s1Ct/rq2Gr3w4F2U7RYYgpmF05bibY=";
-      };
-      "1.5.0.3" = {
-        hash = "sha256-T96+lPC6OTOkIs/z3QWg73oYVSyidN0SVkBWmT9VRx0=";
-      };
-      "2.0.0.7" = {
-        hash = "sha256-32M4rtGOW2rgxJUhBT0WBtKkHhh9f17M+RgK9rvE72g=";
-      };
-    };
-
-    inherit (final) cudaMajorMinorVersion cudaMajorVersion;
-
-    cudaToCutensor = {
-      "10" = "1.2.25";
-      "11" = "1.5.0.3";
-      "12" = "2.0.0.7";
-    };
-
-    versionNewer = lib.flip lib.versionOlder;
-    latestVersion = (builtins.head (lib.sort versionNewer (builtins.attrNames cuTensorVersions)));
-
-    cutensor = buildCuTensorPackage rec {
-      version = cudaToCutensor.${cudaMajorVersion} or latestVersion;
-      inherit (cuTensorVersions.${version}) hash;
-      # This can go into generic.nix
-      libPath = "lib/${if cudaMajorVersion == "10" then cudaMajorMinorVersion else cudaMajorVersion}";
-    };
-  in { inherit cutensor; };
-
-  extraPackagesExtension = final: prev: {
-
-    nccl = final.callPackage ../development/libraries/science/math/nccl { };
-
-    nccl-tests = final.callPackage ../development/libraries/science/math/nccl/tests.nix { };
-
-    autoAddOpenGLRunpathHook = final.callPackage ( { makeSetupHook, addOpenGLRunpath }:
-      makeSetupHook {
-        name = "auto-add-opengl-runpath-hook";
-        propagatedBuildInputs = [
-          addOpenGLRunpath
-        ];
-      } ../development/compilers/cudatoolkit/auto-add-opengl-runpath-hook.sh
-    ) {};
-
-  };
-
-  composedExtension = composeManyExtensions ([
-    extraPackagesExtension
-    (import ../development/compilers/cudatoolkit/extension.nix)
-    (import ../development/compilers/cudatoolkit/redist/extension.nix)
-    (import ../development/compilers/cudatoolkit/redist/overrides.nix)
-    (import ../development/libraries/science/math/cudnn/extension.nix)
-    (import ../development/libraries/science/math/tensorrt/extension.nix)
-    (import ../test/cuda/cuda-samples/extension.nix)
-    (import ../test/cuda/cuda-library-samples/extension.nix)
-    cutensorExtension
-  ]);
-
-in (scope.overrideScope composedExtension)
+  inherit (lib)
+    attrsets
+    customisation
+    fixedPoints
+    strings
+    versions
+    ;
+  # Backbone
+  gpus = builtins.import ../development/cuda-modules/gpus.nix;
+  nvccCompatibilities = builtins.import ../development/cuda-modules/nvcc-compatibilities.nix;
+  flags = callPackage ../development/cuda-modules/flags.nix {inherit cudaVersion gpus;};
+  passthruFunction =
+    final:
+    (
+      {
+        inherit cudaVersion lib pkgs;
+        inherit gpus nvccCompatibilities flags;
+        cudaMajorVersion = versions.major cudaVersion;
+        cudaMajorMinorVersion = versions.majorMinor cudaVersion;
+
+        # Maintain a reference to the final cudaPackages.
+        # Without this, if we use `final.callPackage` and a package accepts `cudaPackages` as an argument,
+        # it's provided with `cudaPackages` from the top-level scope, which is not what we want. We want to
+        # provide the `cudaPackages` from the final scope -- that is, the *current* scope.
+        cudaPackages = final;
+
+        # TODO(@connorbaker): `cudaFlags` is an alias for `flags` which should be removed in the future.
+        cudaFlags = flags;
+
+        # Exposed as cudaPackages.backendStdenv.
+        # This is what nvcc uses as a backend,
+        # and it has to be an officially supported one (e.g. gcc11 for cuda11).
+        #
+        # It, however, propagates current stdenv's libstdc++ to avoid "GLIBCXX_* not found errors"
+        # when linked with other C++ libraries.
+        # E.g. for cudaPackages_11_8 we use gcc11 with gcc12's libstdc++
+        # Cf. https://github.com/NixOS/nixpkgs/pull/218265 for context
+        backendStdenv = final.callPackage ../development/cuda-modules/backend-stdenv.nix {};
+
+        # Loose packages
+        cudatoolkit = final.callPackage ../development/cuda-modules/cudatoolkit {};
+        saxpy = final.callPackage ../development/cuda-modules/saxpy {};
+      }
+      # NCCL is not supported on Jetson, because it does not use NVLink or PCI-e for inter-GPU communication.
+      # https://forums.developer.nvidia.com/t/can-jetson-orin-support-nccl/232845/9
+      // attrsets.optionalAttrs (!flags.isJetsonBuild) {
+        nccl = final.callPackage ../development/cuda-modules/nccl {};
+        nccl-tests = final.callPackage ../development/cuda-modules/nccl-tests {};
+      }
+    );
+
+  mkVersionedPackageName =
+    name: version:
+    strings.concatStringsSep "_" [
+      name
+      (strings.replaceStrings ["."] ["_"] (versions.majorMinor version))
+    ];
+
+  composedExtension = fixedPoints.composeManyExtensions [
+    (import ../development/cuda-modules/setup-hooks/extension.nix)
+    (callPackage ../development/cuda-modules/cuda/extension.nix {inherit cudaVersion;})
+    (callPackage ../development/cuda-modules/cuda/overrides.nix {inherit cudaVersion;})
+    (callPackage ../development/cuda-modules/generic-builders/multiplex.nix {
+      inherit cudaVersion flags mkVersionedPackageName;
+      pname = "cudnn";
+      releasesModule = ../development/cuda-modules/cudnn/releases.nix;
+      shimsFn = ../development/cuda-modules/cudnn/shims.nix;
+      fixupFn = ../development/cuda-modules/cudnn/fixup.nix;
+    })
+    (callPackage ../development/cuda-modules/cutensor/extension.nix {
+      inherit cudaVersion flags mkVersionedPackageName;
+    })
+    (callPackage ../development/cuda-modules/generic-builders/multiplex.nix {
+      inherit cudaVersion flags mkVersionedPackageName;
+      pname = "tensorrt";
+      releasesModule = ../development/cuda-modules/tensorrt/releases.nix;
+      shimsFn = ../development/cuda-modules/tensorrt/shims.nix;
+      fixupFn = ../development/cuda-modules/tensorrt/fixup.nix;
+    })
+    (callPackage ../test/cuda/cuda-samples/extension.nix {inherit cudaVersion;})
+    (callPackage ../test/cuda/cuda-library-samples/extension.nix {})
+  ];
+
+  cudaPackages = customisation.makeScope newScope (
+    fixedPoints.extends composedExtension passthruFunction
+  );
+in
+cudaPackages
-- 
cgit 1.4.1