about summary refs log tree commit diff
path: root/pkgs/development/rocm-modules/6/rocblas/default.nix
diff options
context:
space:
mode:
Diffstat (limited to 'pkgs/development/rocm-modules/6/rocblas/default.nix')
-rw-r--r--pkgs/development/rocm-modules/6/rocblas/default.nix170
1 files changed, 51 insertions, 119 deletions
diff --git a/pkgs/development/rocm-modules/6/rocblas/default.nix b/pkgs/development/rocm-modules/6/rocblas/default.nix
index 296167bb6f287..f93cceddd68af 100644
--- a/pkgs/development/rocm-modules/6/rocblas/default.nix
+++ b/pkgs/development/rocm-modules/6/rocblas/default.nix
@@ -1,7 +1,7 @@
-{ rocblas
-, lib
+{ lib
 , stdenv
 , fetchFromGitHub
+, fetchpatch
 , rocmUpdateScript
 , runCommand
 , cmake
@@ -21,57 +21,26 @@
 , buildBenchmarks ? false
 , tensileLogic ? "asm_full"
 , tensileCOVersion ? "default"
-, tensileSepArch ? true
-, tensileLazyLib ? true
+# https://github.com/ROCm/Tensile/issues/1757
+# Allows gfx101* users to use rocBLAS normally.
+# Turn the below two values to `true` after the fix has been cherry-picked
+# into a release. Just backporting that single fix is not enough because it
+# depends on some previous commits.
+, tensileSepArch ? false
+, tensileLazyLib ? false
 , tensileLibFormat ? "msgpack"
-, gpuTargets ? [ "all" ]
+# `gfx940`, `gfx941` are not present in this list because they are early
+# engineering samples, and all final MI300 hardware are `gfx942`:
+# https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2032791130
+#
+# `gfx1012` is not present in this list because the ISA compatibility patches
+# would force all `gfx101*` GPUs to run as `gfx1010`, so `gfx101*` GPUs will
+# always try to use `gfx1010` code objects, hence building for `gfx1012` is
+# useless: https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2076327152
+, gpuTargets ? [ "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx942;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102" ]
 }:
 
-let
-  # NOTE: Update the default GPU targets on every update
-  gfx80 = (rocblas.override {
-    gpuTargets = [
-      "gfx803"
-    ];
-  }).overrideAttrs { pname = "rocblas-tensile-gfx80"; };
-
-  gfx90 = (rocblas.override {
-    gpuTargets = [
-      "gfx900"
-      "gfx906:xnack-"
-      "gfx908:xnack-"
-      "gfx90a:xnack+"
-      "gfx90a:xnack-"
-    ];
-  }).overrideAttrs { pname = "rocblas-tensile-gfx90"; };
-
-  gfx94 = (rocblas.override {
-    gpuTargets = [
-      "gfx940"
-      "gfx941"
-      "gfx942"
-    ];
-  }).overrideAttrs { pname = "rocblas-tensile-gfx94"; };
-
-  gfx10 = (rocblas.override {
-    gpuTargets = [
-      "gfx1010"
-      "gfx1012"
-      "gfx1030"
-    ];
-  }).overrideAttrs { pname = "rocblas-tensile-gfx10"; };
-
-  gfx11 = (rocblas.override {
-    gpuTargets = [
-      "gfx1100"
-      "gfx1101"
-      "gfx1102"
-    ];
-  }).overrideAttrs { pname = "rocblas-tensile-gfx11"; };
-
-  # Unfortunately, we have to do two full builds, otherwise we get overlapping _fallback.dat files
-  fallbacks = rocblas.overrideAttrs { pname = "rocblas-tensile-fallbacks"; };
-in stdenv.mkDerivation (finalAttrs: {
+stdenv.mkDerivation (finalAttrs: {
   pname = "rocblas";
   version = "6.0.2";
 
@@ -94,6 +63,8 @@ in stdenv.mkDerivation (finalAttrs: {
     cmake
     rocm-cmake
     clr
+  ] ++ lib.optionals buildTensile [
+    tensile
   ];
 
   buildInputs = [
@@ -114,80 +85,41 @@ in stdenv.mkDerivation (finalAttrs: {
   ];
 
   cmakeFlags = [
-    "-DCMAKE_C_COMPILER=hipcc"
-    "-DCMAKE_CXX_COMPILER=hipcc"
-    "-Dpython=python3"
-    "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
-    "-DBUILD_WITH_TENSILE=${if buildTensile then "ON" else "OFF"}"
-    # Manually define CMAKE_INSTALL_<DIR>
-    # See: https://github.com/NixOS/nixpkgs/pull/197838
-    "-DCMAKE_INSTALL_BINDIR=bin"
-    "-DCMAKE_INSTALL_LIBDIR=lib"
-    "-DCMAKE_INSTALL_INCLUDEDIR=include"
+    (lib.cmakeFeature "CMAKE_C_COMPILER" "hipcc")
+    (lib.cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")
+    (lib.cmakeFeature "python" "python3")
+    (lib.cmakeFeature "AMDGPU_TARGETS" (lib.concatStringsSep ";" gpuTargets))
+    (lib.cmakeBool "BUILD_WITH_TENSILE" buildTensile)
+    (lib.cmakeBool "ROCM_SYMLINK_LIBS" false)
+    (lib.cmakeFeature "ROCBLAS_TENSILE_LIBRARY_DIR" "lib/rocblas")
+    (lib.cmakeBool "BUILD_CLIENTS_TESTS" buildTests)
+    (lib.cmakeBool "BUILD_CLIENTS_BENCHMARKS" buildBenchmarks)
+    # rocblas header files are not installed unless we set this
+    (lib.cmakeFeature "CMAKE_INSTALL_INCLUDEDIR" "include")
   ] ++ lib.optionals buildTensile [
-    "-DVIRTUALENV_HOME_DIR=/build/source/tensile"
-    "-DTensile_TEST_LOCAL_PATH=/build/source/tensile"
-    "-DTensile_ROOT=/build/source/tensile/${python3.sitePackages}/Tensile"
-    "-DTensile_LOGIC=${tensileLogic}"
-    "-DTensile_CODE_OBJECT_VERSION=${tensileCOVersion}"
-    "-DTensile_SEPARATE_ARCHITECTURES=${if tensileSepArch then "ON" else "OFF"}"
-    "-DTensile_LAZY_LIBRARY_LOADING=${if tensileLazyLib then "ON" else "OFF"}"
-    "-DTensile_LIBRARY_FORMAT=${tensileLibFormat}"
-  ] ++ lib.optionals buildTests [
-    "-DBUILD_CLIENTS_TESTS=ON"
-  ] ++ lib.optionals buildBenchmarks [
-    "-DBUILD_CLIENTS_BENCHMARKS=ON"
+    (lib.cmakeBool "BUILD_WITH_PIP" false)
+    (lib.cmakeFeature "Tensile_LOGIC" tensileLogic)
+    (lib.cmakeFeature "Tensile_CODE_OBJECT_VERSION" tensileCOVersion)
+    (lib.cmakeBool "Tensile_SEPARATE_ARCHITECTURES" tensileSepArch)
+    (lib.cmakeBool "Tensile_LAZY_LIBRARY_LOADING" tensileLazyLib)
+    (lib.cmakeFeature "Tensile_LIBRARY_FORMAT" tensileLibFormat)
+    (lib.cmakeBool "Tensile_PRINT_DEBUG" true)
   ] ++ lib.optionals (buildTests || buildBenchmarks) [
-    "-DCMAKE_CXX_FLAGS=-I${amd-blis}/include/blis"
+    (lib.cmakeFeature "CMAKE_CXX_FLAGS" "-I${amd-blis}/include/blis")
   ];
 
-  postPatch = lib.optionalString (finalAttrs.pname != "rocblas") ''
-    # Return early and install tensile files manually
-    substituteInPlace library/src/CMakeLists.txt \
-      --replace "set_target_properties( TensileHost PROPERTIES OUTPUT_NAME" "return()''\nset_target_properties( TensileHost PROPERTIES OUTPUT_NAME"
-  '' + lib.optionalString (buildTensile && finalAttrs.pname == "rocblas") ''
-    # Link the prebuilt Tensile files
-    mkdir -p build/Tensile/library
-
-    for path in ${gfx80} ${gfx90} ${gfx94} ${gfx10} ${gfx11} ${fallbacks}; do
-      ln -s $path/lib/rocblas/library/* build/Tensile/library
-    done
-
-    unlink build/Tensile/library/TensileManifest.txt
-  '' + lib.optionalString buildTensile ''
-    # Tensile REALLY wants to write to the nix directory if we include it normally
-    cp -a ${tensile} tensile
-    chmod +w -R tensile
-
-    # Rewrap Tensile
-    substituteInPlace tensile/bin/{.t*,.T*,*} \
-      --replace "${tensile}" "/build/source/tensile"
-
-    substituteInPlace CMakeLists.txt \
-      --replace "include(virtualenv)" "" \
-      --replace "virtualenv_install(\''${Tensile_TEST_LOCAL_PATH})" ""
-  '';
+  patches = [
+    (fetchpatch {
+      name = "Extend-rocBLAS-HIP-ISA-compatibility.patch";
+      url = "https://github.com/GZGavinZhao/rocBLAS/commit/89b75ff9cc731f71f370fad90517395e117b03bb.patch";
+      hash = "sha256-W/ohOOyNCcYYLOiQlPzsrTlNtCBdJpKVxO8s+4G7sjo=";
+    })
+  ];
 
-  postInstall = lib.optionalString (finalAttrs.pname == "rocblas") ''
-    ln -sf ${fallbacks}/lib/rocblas/library/TensileManifest.txt $out/lib/rocblas/library
-  '' + lib.optionalString (finalAttrs.pname != "rocblas") ''
-    mkdir -p $out/lib/rocblas/library
-    rm -rf $out/share
-  '' + lib.optionalString (finalAttrs.pname != "rocblas" && finalAttrs.pname != "rocblas-tensile-fallbacks") ''
-    rm Tensile/library/{TensileManifest.txt,*_fallback.dat}
-    mv Tensile/library/* $out/lib/rocblas/library
-  '' + lib.optionalString (finalAttrs.pname == "rocblas-tensile-fallbacks") ''
-    mv Tensile/library/{TensileManifest.txt,*_fallback.dat} $out/lib/rocblas/library
-  '' + lib.optionalString buildTests ''
-    mkdir -p $test/bin
-    cp -a $out/bin/* $test/bin
-    rm $test/bin/*-bench || true
-  '' + lib.optionalString buildBenchmarks ''
-    mkdir -p $benchmark/bin
-    cp -a $out/bin/* $benchmark/bin
-    rm $benchmark/bin/*-test || true
-  '' + lib.optionalString (buildTests || buildBenchmarks ) ''
-    rm -rf $out/bin
+  # Pass $NIX_BUILD_CORES to Tensile
+  postPatch = ''
+    substituteInPlace cmake/build-options.cmake \
+      --replace-fail 'Tensile_CPU_THREADS ""' 'Tensile_CPU_THREADS "$ENV{NIX_BUILD_CORES}"'
   '';
 
   passthru.updateScript = rocmUpdateScript {