about summary refs log tree commit diff
path: root/pkgs
diff options
context:
space:
mode:
authorKira Bruneau <kira.bruneau@pm.me>2023-06-02 07:50:38 -0400
committerGitHub <noreply@github.com>2023-06-02 07:50:38 -0400
commit564e538d49bcd495238707502b4b6dcafdca9da1 (patch)
tree97907b0d20357bb63636f0012ca3171ea0cd6e20 /pkgs
parent2c8500e8a839f86ac276008c13a0ca5880e7ad36 (diff)
parent9dc0b2f3b5fe19a978ef2d1c7e288da5b36b5404 (diff)
Merge pull request #230881 from kira-bruneau/rocfft
rocfft: split kernel compilation into separate derivations
Diffstat (limited to 'pkgs')
-rw-r--r--pkgs/development/libraries/rocfft/default.nix398
-rw-r--r--pkgs/development/libraries/rocfft/device-install.patch15
-rw-r--r--pkgs/development/libraries/rocfft/split-kernel-compilation.patch124
3 files changed, 360 insertions, 177 deletions
diff --git a/pkgs/development/libraries/rocfft/default.nix b/pkgs/development/libraries/rocfft/default.nix
index 535d27feff6f4..325de9151b4b1 100644
--- a/pkgs/development/libraries/rocfft/default.nix
+++ b/pkgs/development/libraries/rocfft/default.nix
@@ -1,199 +1,243 @@
-{ lib
+{ rocfft
+, lib
 , stdenv
 , fetchFromGitHub
 , rocmUpdateScript
-, runCommand
 , cmake
-, rocm-cmake
-, rocrand
 , hip
-, openmp
-, sqlite
 , python3
-, gtest
+, rocm-cmake
+, sqlite
 , boost
 , fftw
 , fftwFloat
-, buildTests ? false
-, buildBenchmarks ? false
+, gtest
+, openmp
+, rocrand
+# NOTE: Update the default GPU targets on every update
+, gpuTargets ? [
+  "gfx803"
+  "gfx900"
+  "gfx906"
+  "gfx908"
+  "gfx90a"
+  "gfx1030"
+  "gfx1100"
+  "gfx1102"
+]
 }:
 
 let
-  name-zero = "librocfft-device-0.so.0.1";
-  name-one = "librocfft-device-1.so.0.1";
-  name-two = "librocfft-device-2.so.0.1";
-  name-three = "librocfft-device-3.so.0.1";
-
-  # This is over 3GB, to allow hydra caching we separate it
-  rf = stdenv.mkDerivation (finalAttrs: {
-    pname = "rocfft";
-    version = "5.4.3";
-
-    outputs = [
-      "out"
-      "libzero"
-      "libone"
-      "libtwo"
-      "libthree"
-    ] ++ lib.optionals buildTests [
-      "test"
-    ] ++ lib.optionals buildBenchmarks [
-      "benchmark"
-    ];
-
-    src = fetchFromGitHub {
-      owner = "ROCmSoftwarePlatform";
-      repo = "rocFFT";
-      rev = "rocm-${finalAttrs.version}";
-      hash = "sha256-FsefE0B2hF5ZcHDB6TscwFeZ1NKFkWX7VDpEvvbDbOk=";
-    };
-
-    nativeBuildInputs = [
-      cmake
-      rocm-cmake
-      hip
-    ];
-
-    buildInputs = [
-      sqlite
-      python3
-    ] ++ lib.optionals buildTests [
-      gtest
-    ] ++ lib.optionals (buildTests || buildBenchmarks) [
-      rocrand
-      boost
-      fftw
-      fftwFloat
-      openmp
-    ];
-
-    propagatedBuildInputs = lib.optionals buildTests [
-      fftw
-      fftwFloat
-    ];
-
-    cmakeFlags = [
-      "-DCMAKE_C_COMPILER=hipcc"
-      "-DCMAKE_CXX_COMPILER=hipcc"
-      "-DUSE_HIP_CLANG=ON"
-      "-DSQLITE_USE_SYSTEM_PACKAGE=ON"
-      # Manually define CMAKE_INSTALL_<DIR>
-      # See: https://github.com/NixOS/nixpkgs/pull/197838
-      "-DCMAKE_INSTALL_BINDIR=bin"
-      "-DCMAKE_INSTALL_LIBDIR=lib"
-      "-DCMAKE_INSTALL_INCLUDEDIR=include"
-    ] ++ lib.optionals buildTests [
-      "-DBUILD_CLIENTS_TESTS=ON"
-    ] ++ lib.optionals buildBenchmarks [
-      "-DBUILD_CLIENTS_RIDER=ON"
-      "-DBUILD_CLIENTS_SAMPLES=ON"
-    ];
-
-    postInstall = ''
-      mv $out/lib/${name-zero} $libzero
-      mv $out/lib/${name-one} $libone
-      mv $out/lib/${name-two} $libtwo
-      mv $out/lib/${name-three} $libthree
-      ln -s $libzero $out/lib/${name-zero}
-      ln -s $libone $out/lib/${name-one}
-      ln -s $libtwo $out/lib/${name-two}
-      ln -s $libthree $out/lib/${name-three}
-    '' + lib.optionalString buildTests ''
-      mkdir -p $test/{bin,lib/fftw}
-      cp -a $out/bin/* $test/bin
-      ln -s ${fftw}/lib/libfftw*.so $test/lib/fftw
-      ln -s ${fftwFloat}/lib/libfftw*.so $test/lib/fftw
-      rm -r $out/lib/fftw
-      rm $test/bin/{rocfft_rtc_helper,*-rider} || true
-    '' + lib.optionalString buildBenchmarks ''
-      mkdir -p $benchmark/bin
-      cp -a $out/bin/* $benchmark/bin
-      rm $benchmark/bin/{rocfft_rtc_helper,*-test} || true
-    '' + lib.optionalString (buildTests || buildBenchmarks ) ''
-      mv $out/bin/rocfft_rtc_helper $out
-      rm -r $out/bin/*
-      mv $out/rocfft_rtc_helper $out/bin
+  # To avoid output limit exceeded errors in hydra, we build kernel
+  # device libs and the kernel RTC cache database in separate derivations
+  kernelDeviceLibs = map
+    (target:
+      (rocfft.overrideAttrs (prevAttrs: {
+        pname = "rocfft-device-${target}";
+
+        patches = prevAttrs.patches ++ [
+          # Add back install rule for device library
+          # This workaround is needed because rocm_install_targets
+          # doesn't support an EXCLUDE_FROM_ALL option
+          ./device-install.patch
+        ];
+
+        buildFlags = [ "rocfft-device-${target}" ];
+
+        installPhase = ''
+          runHook preInstall
+          cmake --install . --component device
+          runHook postInstall
+        '';
+
+        requiredSystemFeatures = [ "big-parallel" ];
+      })).override {
+        gpuTargets = [ target ];
+      }
+    )
+    gpuTargets;
+
+  # TODO: Figure out how to also split this by GPU target
+  #
+  # It'll be bit more complicated than what we're doing for the kernel
+  # device libs, because the kernel cache needs to be compiled into
+  # one sqlite database (whereas the device libs can be linked into
+  # rocfft as separate libraries for each GPU target).
+  #
+  # It's not clear why this needs to even be a db in the first place.
+  # It would simplify things A LOT if we could just store these
+  # pre-compiled kernels as files (but that'd need a lot of patching).
+  kernelRtcCache = rocfft.overrideAttrs (_: {
+    pname = "rocfft-kernel-cache";
+
+    buildFlags = [ "rocfft_kernel_cache_target" ];
+
+    installPhase = ''
+      runHook preInstall
+      cmake --install . --component kernel_cache
+      runHook postInstall
     '';
 
-    passthru.updateScript = rocmUpdateScript {
-      name = finalAttrs.pname;
-      owner = finalAttrs.src.owner;
-      repo = finalAttrs.src.repo;
-    };
-
-    meta = with lib; {
-      description = "FFT implementation for ROCm ";
-      homepage = "https://github.com/ROCmSoftwarePlatform/rocFFT";
-      license = with licenses; [ mit ];
-      maintainers = teams.rocm.members;
-      platforms = platforms.linux;
-      broken = versions.minor finalAttrs.version != versions.minor hip.version;
-    };
+    requiredSystemFeatures = [ "big-parallel" ];
   });
+in
+stdenv.mkDerivation (finalAttrs: {
+  pname = "rocfft";
+  version = "5.4.3";
+
+  src = fetchFromGitHub {
+    owner = "ROCmSoftwarePlatform";
+    repo = "rocFFT";
+    rev = "rocm-${finalAttrs.version}";
+    hash = "sha256-FsefE0B2hF5ZcHDB6TscwFeZ1NKFkWX7VDpEvvbDbOk=";
+  };
+
+  patches = [
+    # Exclude kernel compilation & installation from "all" target,
+    # and split device libraries by GPU target
+    ./split-kernel-compilation.patch
+  ];
 
-  rf-zero = runCommand name-zero { preferLocalBuild = true; } ''
-    cp -a ${rf.libzero} $out
-  '';
+  nativeBuildInputs = [
+    cmake
+    hip
+    python3
+    rocm-cmake
+  ];
 
-  rf-one = runCommand name-one { preferLocalBuild = true; } ''
-    cp -a ${rf.libone} $out
-  '';
+  buildInputs = [
+    sqlite
+  ] ++ lib.optionals (finalAttrs.pname == "rocfft") kernelDeviceLibs;
+
+  cmakeFlags = [
+    "-DCMAKE_C_COMPILER=hipcc"
+    "-DCMAKE_CXX_COMPILER=hipcc"
+    "-DUSE_HIP_CLANG=ON"
+    "-DSQLITE_USE_SYSTEM_PACKAGE=ON"
+    # Manually define CMAKE_INSTALL_<DIR>
+    # See: https://github.com/NixOS/nixpkgs/pull/197838
+    "-DCMAKE_INSTALL_BINDIR=bin"
+    "-DCMAKE_INSTALL_LIBDIR=lib"
+    "-DCMAKE_INSTALL_INCLUDEDIR=include"
+    "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}"
+  ];
 
-  rf-two = runCommand name-two { preferLocalBuild = true; } ''
-    cp -a ${rf.libtwo} $out
+  postInstall = lib.optionalString (finalAttrs.pname == "rocfft") ''
+    ln -s ${kernelRtcCache}/lib/rocfft_kernel_cache.db "$out/lib"
   '';
 
-  rf-three = runCommand name-three { preferLocalBuild = true; } ''
-    cp -a ${rf.libthree} $out
-  '';
-in stdenv.mkDerivation {
-  inherit (rf) pname version src passthru meta;
-
-  outputs = [
-    "out"
-  ] ++ lib.optionals buildTests [
-    "test"
-  ] ++ lib.optionals buildBenchmarks [
-    "benchmark"
-  ];
+  passthru = {
+    test = stdenv.mkDerivation {
+      pname = "${finalAttrs.pname}-test";
+      inherit (finalAttrs) version src;
+
+      sourceRoot = "source/clients/tests";
+
+      nativeBuildInputs = [
+        cmake
+        hip
+        rocm-cmake
+      ];
+
+      buildInputs = [
+        boost
+        fftw
+        fftwFloat
+        finalAttrs.finalPackage
+        gtest
+        openmp
+        rocrand
+      ];
+
+      cmakeFlags = [
+        "-DCMAKE_C_COMPILER=hipcc"
+        "-DCMAKE_CXX_COMPILER=hipcc"
+      ];
+
+      postInstall = ''
+        rm -r "$out/lib/fftw"
+        rmdir "$out/lib"
+      '';
+    };
 
-  dontUnpack = true;
-  dontPatch = true;
-  dontConfigure = true;
-  dontBuild = true;
-
-  installPhase = ''
-    runHook preInstall
-
-    mkdir -p $out/lib
-    ln -sf ${rf-zero} $out/lib/${name-zero}
-    ln -sf ${rf-one} $out/lib/${name-one}
-    ln -sf ${rf-two} $out/lib/${name-two}
-    ln -sf ${rf-three} $out/lib/${name-three}
-    cp -an ${rf}/* $out
-  '' + lib.optionalString buildTests ''
-    cp -a ${rf.test} $test
-  '' + lib.optionalString buildBenchmarks ''
-    cp -a ${rf.benchmark} $benchmark
-  '' + ''
-    runHook postInstall
-  '';
+    benchmark = stdenv.mkDerivation {
+      pname = "${finalAttrs.pname}-benchmark";
+      inherit (finalAttrs) version src;
+
+      sourceRoot = "source/clients/rider";
+
+      nativeBuildInputs = [
+        cmake
+        hip
+        rocm-cmake
+      ];
+
+      buildInputs = [
+        boost
+        finalAttrs.finalPackage
+        openmp
+        (python3.withPackages (ps: with ps; [
+          pandas
+          scipy
+        ]))
+        rocrand
+      ];
+
+      cmakeFlags = [
+        "-DCMAKE_C_COMPILER=hipcc"
+        "-DCMAKE_CXX_COMPILER=hipcc"
+      ];
+
+      postInstall = ''
+        cp -a ../../../scripts/perf "$out/bin"
+      '';
+    };
 
-  # Fix paths
-  preFixup = ''
-    substituteInPlace $out/include/*.h $out/rocfft/include/*.h \
-      --replace "${rf}" "$out"
-
-    patchelf --set-rpath \
-      $(patchelf --print-rpath $out/lib/librocfft.so | sed 's,${rf}/lib,'"$out/lib"',') \
-      $out/lib/librocfft.so
-  '' + lib.optionalString buildTests ''
-    patchelf --set-rpath \
-      $(patchelf --print-rpath $test/bin/rocfft-test | sed 's,${rf}/lib,'"$out/lib"',') \
-      $test/bin/rocfft-test
-  '' + lib.optionalString buildBenchmarks ''
-    patchelf --set-rpath \
-      $(patchelf --print-rpath $benchmark/bin/rocfft-rider | sed 's,${rf}/lib,'"$out/lib"',') \
-      $benchmark/bin/rocfft-rider
-  '';
-}
+    samples = stdenv.mkDerivation {
+      pname = "${finalAttrs.pname}-samples";
+      inherit (finalAttrs) version src;
+
+      sourceRoot = "source/clients/samples";
+
+      nativeBuildInputs = [
+        cmake
+        hip
+        rocm-cmake
+      ];
+
+      buildInputs = [
+        boost
+        finalAttrs.finalPackage
+        openmp
+        rocrand
+      ];
+
+      cmakeFlags = [
+        "-DCMAKE_C_COMPILER=hipcc"
+        "-DCMAKE_CXX_COMPILER=hipcc"
+      ];
+
+      installPhase = ''
+        runHook preInstall
+        mkdir "$out"
+        cp -a bin "$out"
+        runHook postInstall
+      '';
+    };
+
+    updateScript = rocmUpdateScript {
+      name = finalAttrs.pname;
+      owner = finalAttrs.src.owner;
+      repo = finalAttrs.src.repo;
+    };
+  };
+
+  meta = with lib; {
+    description = "FFT implementation for ROCm";
+    homepage = "https://github.com/ROCmSoftwarePlatform/rocFFT";
+    license = with licenses; [ mit ];
+    maintainers = with maintainers; [ kira-bruneau ] ++ teams.rocm.members;
+    platforms = platforms.linux;
+    broken = versions.minor finalAttrs.version != versions.minor hip.version;
+  };
+})
diff --git a/pkgs/development/libraries/rocfft/device-install.patch b/pkgs/development/libraries/rocfft/device-install.patch
new file mode 100644
index 0000000000000..355cf30d07ff1
--- /dev/null
+++ b/pkgs/development/libraries/rocfft/device-install.patch
@@ -0,0 +1,15 @@
+diff --git a/library/src/device/CMakeLists.txt b/library/src/device/CMakeLists.txt
+index 73a8ec9..9bfd4b8 100644
+--- a/library/src/device/CMakeLists.txt
++++ b/library/src/device/CMakeLists.txt
+@@ -255,4 +255,10 @@ foreach( sub ${AMDGPU_TARGETS} )
+   if( NOT BUILD_SHARED_LIBS )
+     set_target_properties( rocfft-device-${sub} PROPERTIES PREFIX "lib" )
+   endif( )
++
++  rocm_install_targets(
++    TARGETS
++    rocfft-device-${sub}
++    COMPONENT device
++  )
+ endforeach()
diff --git a/pkgs/development/libraries/rocfft/split-kernel-compilation.patch b/pkgs/development/libraries/rocfft/split-kernel-compilation.patch
new file mode 100644
index 0000000000000..5d71fe399c1a5
--- /dev/null
+++ b/pkgs/development/libraries/rocfft/split-kernel-compilation.patch
@@ -0,0 +1,124 @@
+diff --git a/library/src/CMakeLists.txt b/library/src/CMakeLists.txt
+index 3a16304..606b711 100644
+--- a/library/src/CMakeLists.txt
++++ b/library/src/CMakeLists.txt
+@@ -250,12 +250,12 @@ foreach( target
+ 
+ endforeach()
+ 
+-add_executable( rocfft_aot_helper
++add_executable( rocfft_aot_helper EXCLUDE_FROM_ALL
+   rocfft_aot_helper.cpp
+   rocfft_stub.cpp
+ )
+ 
+-add_executable( rocfft_config_search
++add_executable( rocfft_config_search EXCLUDE_FROM_ALL
+   rocfft_config_search.cpp
+   rocfft_stub.cpp
+ )
+@@ -279,10 +279,10 @@ endif()
+ 
+ target_link_libraries( rocfft PRIVATE ${ROCFFT_DEVICE_LINK_LIBS} )
+ 
+-target_link_libraries( rocfft PRIVATE rocfft-device-0 )
+-target_link_libraries( rocfft PRIVATE rocfft-device-1 )
+-target_link_libraries( rocfft PRIVATE rocfft-device-2 )
+-target_link_libraries( rocfft PRIVATE rocfft-device-3 )
++foreach( sub ${AMDGPU_TARGETS} )
++  target_link_libraries( rocfft PRIVATE -lrocfft-device-${sub} )
++endforeach()
++
+ foreach( target rocfft rocfft_aot_helper rocfft_config_search )
+   # RTC uses dladdr to find the RTC helper program
+   if( NOT WIN32 )
+@@ -347,7 +347,7 @@ add_custom_command(
+   DEPENDS rocfft_aot_helper rocfft_rtc_helper
+   COMMENT "Compile kernels into shipped cache file"
+ )
+-add_custom_target( rocfft_kernel_cache_target ALL
++add_custom_target( rocfft_kernel_cache_target
+   DEPENDS rocfft_kernel_cache.db
+   VERBATIM
+ )
+@@ -392,7 +392,8 @@ else()
+ endif()
+ rocm_install(FILES ${ROCFFT_KERNEL_CACHE_PATH}
+   DESTINATION "${ROCFFT_KERNEL_CACHE_INSTALL_DIR}"
+-  COMPONENT runtime
++  COMPONENT kernel_cache
++  EXCLUDE_FROM_ALL
+ )
+ 
+ #         PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ
+diff --git a/library/src/device/CMakeLists.txt b/library/src/device/CMakeLists.txt
+index 9f7b85f..73a8ec9 100644
+--- a/library/src/device/CMakeLists.txt
++++ b/library/src/device/CMakeLists.txt
+@@ -170,11 +170,11 @@ list( SORT rocfft_device_source )
+ # functions callable by rocFFT and depends on amdhip64, and another
+ # one usable by AOT RTC that contains no device code
+ list( FILTER rocfft_device_source EXCLUDE REGEX function_pool.cpp )
+-add_library( rocfft-function-pool OBJECT
++add_library( rocfft-function-pool OBJECT EXCLUDE_FROM_ALL
+   function_pool.cpp
+ )
+ target_compile_definitions( rocfft-function-pool PRIVATE FUNCTION_POOL_STANDALONE_BODY= )
+-add_library( rocfft-function-pool-standalone OBJECT
++add_library( rocfft-function-pool-standalone OBJECT EXCLUDE_FROM_ALL
+   function_pool.cpp
+ )
+ target_compile_definitions( rocfft-function-pool-standalone PRIVATE FUNCTION_POOL_STANDALONE_BODY={} )
+@@ -193,26 +193,15 @@ foreach( pool rocfft-function-pool rocfft-function-pool-standalone )
+   add_dependencies(${pool} gen_headers_target)
+ endforeach()
+ 
+-list( LENGTH rocfft_device_source rocfft_device_source_len )
+-math(EXPR split_len "${rocfft_device_source_len} / 4")
+-math(EXPR split_idx_2 "${rocfft_device_source_len} / 4 * 2")
+-math(EXPR split_idx_3 "${rocfft_device_source_len} / 4 * 3")
+-
+-list( SUBLIST rocfft_device_source 0 ${split_len} rocfft_device_source_0 )
+-list( SUBLIST rocfft_device_source ${split_len} ${split_len} rocfft_device_source_1 )
+-list( SUBLIST rocfft_device_source ${split_idx_2} ${split_len} rocfft_device_source_2 )
+-list( SUBLIST rocfft_device_source ${split_idx_3} -1 rocfft_device_source_3 )
+-
+-foreach( sub RANGE 3 )
+-  set( rocfft_device_source_var rocfft_device_source_${sub} )
++foreach( sub ${AMDGPU_TARGETS} )
+   if(NOT SINGLELIB)
+-    add_library( rocfft-device-${sub}
+-      ${${rocfft_device_source_var}} )
++    add_library( rocfft-device-${sub} EXCLUDE_FROM_ALL
++      ${rocfft_device_source} )
+   else()
+     # Compile the device lib as a static library, which is then linked
+     # into librocfft.so Useful for testing purposes.
+-    add_library( rocfft-device-${sub} STATIC
+-      ${${rocfft_device_source_var}} )
++    add_library( rocfft-device-${sub} STATIC EXCLUDE_FROM_ALL
++      ${rocfft_device_source} )
+ 
+     # if we're building singlelib, we don't want to export any of the
+     # device library symbols to the main library
+@@ -241,9 +230,7 @@ foreach( sub RANGE 3 )
+   # Set AMD GPU architecture options
+ 
+   # Enable compilation of desired architectures
+-  foreach( target ${AMDGPU_TARGETS} )
+-    target_compile_options( rocfft-device-${sub} PRIVATE --offload-arch=${target} )
+-  endforeach( )
++  target_compile_options( rocfft-device-${sub} PRIVATE --offload-arch=${sub} )
+ 
+   target_include_directories( rocfft-device-${sub}
+     PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
+@@ -268,9 +255,4 @@ foreach( sub RANGE 3 )
+   if( NOT BUILD_SHARED_LIBS )
+     set_target_properties( rocfft-device-${sub} PROPERTIES PREFIX "lib" )
+   endif( )
+-
+-  rocm_install_targets(
+-    TARGETS
+-    rocfft-device-${sub}
+-    )
+ endforeach()