diff options
Diffstat (limited to 'pkgs/development/rocm-modules/6/rocblas/default.nix')
-rw-r--r-- | pkgs/development/rocm-modules/6/rocblas/default.nix | 170 |
1 files changed, 51 insertions, 119 deletions
diff --git a/pkgs/development/rocm-modules/6/rocblas/default.nix b/pkgs/development/rocm-modules/6/rocblas/default.nix index 296167bb6f287..f93cceddd68af 100644 --- a/pkgs/development/rocm-modules/6/rocblas/default.nix +++ b/pkgs/development/rocm-modules/6/rocblas/default.nix @@ -1,7 +1,7 @@ -{ rocblas -, lib +{ lib , stdenv , fetchFromGitHub +, fetchpatch , rocmUpdateScript , runCommand , cmake @@ -21,57 +21,26 @@ , buildBenchmarks ? false , tensileLogic ? "asm_full" , tensileCOVersion ? "default" -, tensileSepArch ? true -, tensileLazyLib ? true +# https://github.com/ROCm/Tensile/issues/1757 +# Allows gfx101* users to use rocBLAS normally. +# Turn the below two values to `true` after the fix has been cherry-picked +# into a release. Just backporting that single fix is not enough because it +# depends on some previous commits. +, tensileSepArch ? false +, tensileLazyLib ? false , tensileLibFormat ? "msgpack" -, gpuTargets ? [ "all" ] +# `gfx940`, `gfx941` are not present in this list because they are early +# engineering samples, and all final MI300 hardware are `gfx942`: +# https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2032791130 +# +# `gfx1012` is not present in this list because the ISA compatibility patches +# would force all `gfx101*` GPUs to run as `gfx1010`, so `gfx101*` GPUs will +# always try to use `gfx1010` code objects, hence building for `gfx1012` is +# useless: https://github.com/NixOS/nixpkgs/pull/298388#issuecomment-2076327152 +, gpuTargets ? [ "gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx942;gfx1010;gfx1030;gfx1100;gfx1101;gfx1102" ] }: -let - # NOTE: Update the default GPU targets on every update - gfx80 = (rocblas.override { - gpuTargets = [ - "gfx803" - ]; - }).overrideAttrs { pname = "rocblas-tensile-gfx80"; }; - - gfx90 = (rocblas.override { - gpuTargets = [ - "gfx900" - "gfx906:xnack-" - "gfx908:xnack-" - "gfx90a:xnack+" - "gfx90a:xnack-" - ]; - }).overrideAttrs { pname = "rocblas-tensile-gfx90"; }; - - gfx94 = (rocblas.override { - gpuTargets = [ - "gfx940" - "gfx941" - "gfx942" - ]; - }).overrideAttrs { pname = "rocblas-tensile-gfx94"; }; - - gfx10 = (rocblas.override { - gpuTargets = [ - "gfx1010" - "gfx1012" - "gfx1030" - ]; - }).overrideAttrs { pname = "rocblas-tensile-gfx10"; }; - - gfx11 = (rocblas.override { - gpuTargets = [ - "gfx1100" - "gfx1101" - "gfx1102" - ]; - }).overrideAttrs { pname = "rocblas-tensile-gfx11"; }; - - # Unfortunately, we have to do two full builds, otherwise we get overlapping _fallback.dat files - fallbacks = rocblas.overrideAttrs { pname = "rocblas-tensile-fallbacks"; }; -in stdenv.mkDerivation (finalAttrs: { +stdenv.mkDerivation (finalAttrs: { pname = "rocblas"; version = "6.0.2"; @@ -94,6 +63,8 @@ in stdenv.mkDerivation (finalAttrs: { cmake rocm-cmake clr + ] ++ lib.optionals buildTensile [ + tensile ]; buildInputs = [ @@ -114,80 +85,41 @@ in stdenv.mkDerivation (finalAttrs: { ]; cmakeFlags = [ - "-DCMAKE_C_COMPILER=hipcc" - "-DCMAKE_CXX_COMPILER=hipcc" - "-Dpython=python3" - "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" - "-DBUILD_WITH_TENSILE=${if buildTensile then "ON" else "OFF"}" - # Manually define CMAKE_INSTALL_<DIR> - # See: https://github.com/NixOS/nixpkgs/pull/197838 - "-DCMAKE_INSTALL_BINDIR=bin" - "-DCMAKE_INSTALL_LIBDIR=lib" - "-DCMAKE_INSTALL_INCLUDEDIR=include" + (lib.cmakeFeature "CMAKE_C_COMPILER" "hipcc") + (lib.cmakeFeature "CMAKE_CXX_COMPILER" "hipcc") + (lib.cmakeFeature "python" "python3") + (lib.cmakeFeature "AMDGPU_TARGETS" (lib.concatStringsSep ";" gpuTargets)) + (lib.cmakeBool "BUILD_WITH_TENSILE" buildTensile) + (lib.cmakeBool "ROCM_SYMLINK_LIBS" false) + (lib.cmakeFeature "ROCBLAS_TENSILE_LIBRARY_DIR" "lib/rocblas") + (lib.cmakeBool "BUILD_CLIENTS_TESTS" buildTests) + (lib.cmakeBool "BUILD_CLIENTS_BENCHMARKS" buildBenchmarks) + # rocblas header files are not installed unless we set this + (lib.cmakeFeature "CMAKE_INSTALL_INCLUDEDIR" "include") ] ++ lib.optionals buildTensile [ - "-DVIRTUALENV_HOME_DIR=/build/source/tensile" - "-DTensile_TEST_LOCAL_PATH=/build/source/tensile" - "-DTensile_ROOT=/build/source/tensile/${python3.sitePackages}/Tensile" - "-DTensile_LOGIC=${tensileLogic}" - "-DTensile_CODE_OBJECT_VERSION=${tensileCOVersion}" - "-DTensile_SEPARATE_ARCHITECTURES=${if tensileSepArch then "ON" else "OFF"}" - "-DTensile_LAZY_LIBRARY_LOADING=${if tensileLazyLib then "ON" else "OFF"}" - "-DTensile_LIBRARY_FORMAT=${tensileLibFormat}" - ] ++ lib.optionals buildTests [ - "-DBUILD_CLIENTS_TESTS=ON" - ] ++ lib.optionals buildBenchmarks [ - "-DBUILD_CLIENTS_BENCHMARKS=ON" + (lib.cmakeBool "BUILD_WITH_PIP" false) + (lib.cmakeFeature "Tensile_LOGIC" tensileLogic) + (lib.cmakeFeature "Tensile_CODE_OBJECT_VERSION" tensileCOVersion) + (lib.cmakeBool "Tensile_SEPARATE_ARCHITECTURES" tensileSepArch) + (lib.cmakeBool "Tensile_LAZY_LIBRARY_LOADING" tensileLazyLib) + (lib.cmakeFeature "Tensile_LIBRARY_FORMAT" tensileLibFormat) + (lib.cmakeBool "Tensile_PRINT_DEBUG" true) ] ++ lib.optionals (buildTests || buildBenchmarks) [ - "-DCMAKE_CXX_FLAGS=-I${amd-blis}/include/blis" + (lib.cmakeFeature "CMAKE_CXX_FLAGS" "-I${amd-blis}/include/blis") ]; - postPatch = lib.optionalString (finalAttrs.pname != "rocblas") '' - # Return early and install tensile files manually - substituteInPlace library/src/CMakeLists.txt \ - --replace "set_target_properties( TensileHost PROPERTIES OUTPUT_NAME" "return()''\nset_target_properties( TensileHost PROPERTIES OUTPUT_NAME" - '' + lib.optionalString (buildTensile && finalAttrs.pname == "rocblas") '' - # Link the prebuilt Tensile files - mkdir -p build/Tensile/library - - for path in ${gfx80} ${gfx90} ${gfx94} ${gfx10} ${gfx11} ${fallbacks}; do - ln -s $path/lib/rocblas/library/* build/Tensile/library - done - - unlink build/Tensile/library/TensileManifest.txt - '' + lib.optionalString buildTensile '' - # Tensile REALLY wants to write to the nix directory if we include it normally - cp -a ${tensile} tensile - chmod +w -R tensile - - # Rewrap Tensile - substituteInPlace tensile/bin/{.t*,.T*,*} \ - --replace "${tensile}" "/build/source/tensile" - - substituteInPlace CMakeLists.txt \ - --replace "include(virtualenv)" "" \ - --replace "virtualenv_install(\''${Tensile_TEST_LOCAL_PATH})" "" - ''; + patches = [ + (fetchpatch { + name = "Extend-rocBLAS-HIP-ISA-compatibility.patch"; + url = "https://github.com/GZGavinZhao/rocBLAS/commit/89b75ff9cc731f71f370fad90517395e117b03bb.patch"; + hash = "sha256-W/ohOOyNCcYYLOiQlPzsrTlNtCBdJpKVxO8s+4G7sjo="; + }) + ]; - postInstall = lib.optionalString (finalAttrs.pname == "rocblas") '' - ln -sf ${fallbacks}/lib/rocblas/library/TensileManifest.txt $out/lib/rocblas/library - '' + lib.optionalString (finalAttrs.pname != "rocblas") '' - mkdir -p $out/lib/rocblas/library - rm -rf $out/share - '' + lib.optionalString (finalAttrs.pname != "rocblas" && finalAttrs.pname != "rocblas-tensile-fallbacks") '' - rm Tensile/library/{TensileManifest.txt,*_fallback.dat} - mv Tensile/library/* $out/lib/rocblas/library - '' + lib.optionalString (finalAttrs.pname == "rocblas-tensile-fallbacks") '' - mv Tensile/library/{TensileManifest.txt,*_fallback.dat} $out/lib/rocblas/library - '' + lib.optionalString buildTests '' - mkdir -p $test/bin - cp -a $out/bin/* $test/bin - rm $test/bin/*-bench || true - '' + lib.optionalString buildBenchmarks '' - mkdir -p $benchmark/bin - cp -a $out/bin/* $benchmark/bin - rm $benchmark/bin/*-test || true - '' + lib.optionalString (buildTests || buildBenchmarks ) '' - rm -rf $out/bin + # Pass $NIX_BUILD_CORES to Tensile + postPatch = '' + substituteInPlace cmake/build-options.cmake \ + --replace-fail 'Tensile_CPU_THREADS ""' 'Tensile_CPU_THREADS "$ENV{NIX_BUILD_CORES}"' ''; passthru.updateScript = rocmUpdateScript { |