diff options
author | Kira Bruneau <kira.bruneau@pm.me> | 2023-06-02 07:50:38 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-06-02 07:50:38 -0400 |
commit | 564e538d49bcd495238707502b4b6dcafdca9da1 (patch) | |
tree | 97907b0d20357bb63636f0012ca3171ea0cd6e20 /pkgs | |
parent | 2c8500e8a839f86ac276008c13a0ca5880e7ad36 (diff) | |
parent | 9dc0b2f3b5fe19a978ef2d1c7e288da5b36b5404 (diff) |
Merge pull request #230881 from kira-bruneau/rocfft
rocfft: split kernel compilation into separate derivations
Diffstat (limited to 'pkgs')
3 files changed, 360 insertions, 177 deletions
diff --git a/pkgs/development/libraries/rocfft/default.nix b/pkgs/development/libraries/rocfft/default.nix index 535d27feff6f4..325de9151b4b1 100644 --- a/pkgs/development/libraries/rocfft/default.nix +++ b/pkgs/development/libraries/rocfft/default.nix @@ -1,199 +1,243 @@ -{ lib +{ rocfft +, lib , stdenv , fetchFromGitHub , rocmUpdateScript -, runCommand , cmake -, rocm-cmake -, rocrand , hip -, openmp -, sqlite , python3 -, gtest +, rocm-cmake +, sqlite , boost , fftw , fftwFloat -, buildTests ? false -, buildBenchmarks ? false +, gtest +, openmp +, rocrand +# NOTE: Update the default GPU targets on every update +, gpuTargets ? [ + "gfx803" + "gfx900" + "gfx906" + "gfx908" + "gfx90a" + "gfx1030" + "gfx1100" + "gfx1102" +] }: let - name-zero = "librocfft-device-0.so.0.1"; - name-one = "librocfft-device-1.so.0.1"; - name-two = "librocfft-device-2.so.0.1"; - name-three = "librocfft-device-3.so.0.1"; - - # This is over 3GB, to allow hydra caching we separate it - rf = stdenv.mkDerivation (finalAttrs: { - pname = "rocfft"; - version = "5.4.3"; - - outputs = [ - "out" - "libzero" - "libone" - "libtwo" - "libthree" - ] ++ lib.optionals buildTests [ - "test" - ] ++ lib.optionals buildBenchmarks [ - "benchmark" - ]; - - src = fetchFromGitHub { - owner = "ROCmSoftwarePlatform"; - repo = "rocFFT"; - rev = "rocm-${finalAttrs.version}"; - hash = "sha256-FsefE0B2hF5ZcHDB6TscwFeZ1NKFkWX7VDpEvvbDbOk="; - }; - - nativeBuildInputs = [ - cmake - rocm-cmake - hip - ]; - - buildInputs = [ - sqlite - python3 - ] ++ lib.optionals buildTests [ - gtest - ] ++ lib.optionals (buildTests || buildBenchmarks) [ - rocrand - boost - fftw - fftwFloat - openmp - ]; - - propagatedBuildInputs = lib.optionals buildTests [ - fftw - fftwFloat - ]; - - cmakeFlags = [ - "-DCMAKE_C_COMPILER=hipcc" - "-DCMAKE_CXX_COMPILER=hipcc" - "-DUSE_HIP_CLANG=ON" - "-DSQLITE_USE_SYSTEM_PACKAGE=ON" - # Manually define CMAKE_INSTALL_<DIR> - # See: https://github.com/NixOS/nixpkgs/pull/197838 - "-DCMAKE_INSTALL_BINDIR=bin" - "-DCMAKE_INSTALL_LIBDIR=lib" - "-DCMAKE_INSTALL_INCLUDEDIR=include" - ] ++ lib.optionals buildTests [ - "-DBUILD_CLIENTS_TESTS=ON" - ] ++ lib.optionals buildBenchmarks [ - "-DBUILD_CLIENTS_RIDER=ON" - "-DBUILD_CLIENTS_SAMPLES=ON" - ]; - - postInstall = '' - mv $out/lib/${name-zero} $libzero - mv $out/lib/${name-one} $libone - mv $out/lib/${name-two} $libtwo - mv $out/lib/${name-three} $libthree - ln -s $libzero $out/lib/${name-zero} - ln -s $libone $out/lib/${name-one} - ln -s $libtwo $out/lib/${name-two} - ln -s $libthree $out/lib/${name-three} - '' + lib.optionalString buildTests '' - mkdir -p $test/{bin,lib/fftw} - cp -a $out/bin/* $test/bin - ln -s ${fftw}/lib/libfftw*.so $test/lib/fftw - ln -s ${fftwFloat}/lib/libfftw*.so $test/lib/fftw - rm -r $out/lib/fftw - rm $test/bin/{rocfft_rtc_helper,*-rider} || true - '' + lib.optionalString buildBenchmarks '' - mkdir -p $benchmark/bin - cp -a $out/bin/* $benchmark/bin - rm $benchmark/bin/{rocfft_rtc_helper,*-test} || true - '' + lib.optionalString (buildTests || buildBenchmarks ) '' - mv $out/bin/rocfft_rtc_helper $out - rm -r $out/bin/* - mv $out/rocfft_rtc_helper $out/bin + # To avoid output limit exceeded errors in hydra, we build kernel + # device libs and the kernel RTC cache database in separate derivations + kernelDeviceLibs = map + (target: + (rocfft.overrideAttrs (prevAttrs: { + pname = "rocfft-device-${target}"; + + patches = prevAttrs.patches ++ [ + # Add back install rule for device library + # This workaround is needed because rocm_install_targets + # doesn't support an EXCLUDE_FROM_ALL option + ./device-install.patch + ]; + + buildFlags = [ "rocfft-device-${target}" ]; + + installPhase = '' + runHook preInstall + cmake --install . --component device + runHook postInstall + ''; + + requiredSystemFeatures = [ "big-parallel" ]; + })).override { + gpuTargets = [ target ]; + } + ) + gpuTargets; + + # TODO: Figure out how to also split this by GPU target + # + # It'll be bit more complicated than what we're doing for the kernel + # device libs, because the kernel cache needs to be compiled into + # one sqlite database (whereas the device libs can be linked into + # rocfft as separate libraries for each GPU target). + # + # It's not clear why this needs to even be a db in the first place. + # It would simplify things A LOT if we could just store these + # pre-compiled kernels as files (but that'd need a lot of patching). + kernelRtcCache = rocfft.overrideAttrs (_: { + pname = "rocfft-kernel-cache"; + + buildFlags = [ "rocfft_kernel_cache_target" ]; + + installPhase = '' + runHook preInstall + cmake --install . --component kernel_cache + runHook postInstall ''; - passthru.updateScript = rocmUpdateScript { - name = finalAttrs.pname; - owner = finalAttrs.src.owner; - repo = finalAttrs.src.repo; - }; - - meta = with lib; { - description = "FFT implementation for ROCm "; - homepage = "https://github.com/ROCmSoftwarePlatform/rocFFT"; - license = with licenses; [ mit ]; - maintainers = teams.rocm.members; - platforms = platforms.linux; - broken = versions.minor finalAttrs.version != versions.minor hip.version; - }; + requiredSystemFeatures = [ "big-parallel" ]; }); +in +stdenv.mkDerivation (finalAttrs: { + pname = "rocfft"; + version = "5.4.3"; + + src = fetchFromGitHub { + owner = "ROCmSoftwarePlatform"; + repo = "rocFFT"; + rev = "rocm-${finalAttrs.version}"; + hash = "sha256-FsefE0B2hF5ZcHDB6TscwFeZ1NKFkWX7VDpEvvbDbOk="; + }; + + patches = [ + # Exclude kernel compilation & installation from "all" target, + # and split device libraries by GPU target + ./split-kernel-compilation.patch + ]; - rf-zero = runCommand name-zero { preferLocalBuild = true; } '' - cp -a ${rf.libzero} $out - ''; + nativeBuildInputs = [ + cmake + hip + python3 + rocm-cmake + ]; - rf-one = runCommand name-one { preferLocalBuild = true; } '' - cp -a ${rf.libone} $out - ''; + buildInputs = [ + sqlite + ] ++ lib.optionals (finalAttrs.pname == "rocfft") kernelDeviceLibs; + + cmakeFlags = [ + "-DCMAKE_C_COMPILER=hipcc" + "-DCMAKE_CXX_COMPILER=hipcc" + "-DUSE_HIP_CLANG=ON" + "-DSQLITE_USE_SYSTEM_PACKAGE=ON" + # Manually define CMAKE_INSTALL_<DIR> + # See: https://github.com/NixOS/nixpkgs/pull/197838 + "-DCMAKE_INSTALL_BINDIR=bin" + "-DCMAKE_INSTALL_LIBDIR=lib" + "-DCMAKE_INSTALL_INCLUDEDIR=include" + "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" gpuTargets}" + ]; - rf-two = runCommand name-two { preferLocalBuild = true; } '' - cp -a ${rf.libtwo} $out + postInstall = lib.optionalString (finalAttrs.pname == "rocfft") '' + ln -s ${kernelRtcCache}/lib/rocfft_kernel_cache.db "$out/lib" ''; - rf-three = runCommand name-three { preferLocalBuild = true; } '' - cp -a ${rf.libthree} $out - ''; -in stdenv.mkDerivation { - inherit (rf) pname version src passthru meta; - - outputs = [ - "out" - ] ++ lib.optionals buildTests [ - "test" - ] ++ lib.optionals buildBenchmarks [ - "benchmark" - ]; + passthru = { + test = stdenv.mkDerivation { + pname = "${finalAttrs.pname}-test"; + inherit (finalAttrs) version src; + + sourceRoot = "source/clients/tests"; + + nativeBuildInputs = [ + cmake + hip + rocm-cmake + ]; + + buildInputs = [ + boost + fftw + fftwFloat + finalAttrs.finalPackage + gtest + openmp + rocrand + ]; + + cmakeFlags = [ + "-DCMAKE_C_COMPILER=hipcc" + "-DCMAKE_CXX_COMPILER=hipcc" + ]; + + postInstall = '' + rm -r "$out/lib/fftw" + rmdir "$out/lib" + ''; + }; - dontUnpack = true; - dontPatch = true; - dontConfigure = true; - dontBuild = true; - - installPhase = '' - runHook preInstall - - mkdir -p $out/lib - ln -sf ${rf-zero} $out/lib/${name-zero} - ln -sf ${rf-one} $out/lib/${name-one} - ln -sf ${rf-two} $out/lib/${name-two} - ln -sf ${rf-three} $out/lib/${name-three} - cp -an ${rf}/* $out - '' + lib.optionalString buildTests '' - cp -a ${rf.test} $test - '' + lib.optionalString buildBenchmarks '' - cp -a ${rf.benchmark} $benchmark - '' + '' - runHook postInstall - ''; + benchmark = stdenv.mkDerivation { + pname = "${finalAttrs.pname}-benchmark"; + inherit (finalAttrs) version src; + + sourceRoot = "source/clients/rider"; + + nativeBuildInputs = [ + cmake + hip + rocm-cmake + ]; + + buildInputs = [ + boost + finalAttrs.finalPackage + openmp + (python3.withPackages (ps: with ps; [ + pandas + scipy + ])) + rocrand + ]; + + cmakeFlags = [ + "-DCMAKE_C_COMPILER=hipcc" + "-DCMAKE_CXX_COMPILER=hipcc" + ]; + + postInstall = '' + cp -a ../../../scripts/perf "$out/bin" + ''; + }; - # Fix paths - preFixup = '' - substituteInPlace $out/include/*.h $out/rocfft/include/*.h \ - --replace "${rf}" "$out" - - patchelf --set-rpath \ - $(patchelf --print-rpath $out/lib/librocfft.so | sed 's,${rf}/lib,'"$out/lib"',') \ - $out/lib/librocfft.so - '' + lib.optionalString buildTests '' - patchelf --set-rpath \ - $(patchelf --print-rpath $test/bin/rocfft-test | sed 's,${rf}/lib,'"$out/lib"',') \ - $test/bin/rocfft-test - '' + lib.optionalString buildBenchmarks '' - patchelf --set-rpath \ - $(patchelf --print-rpath $benchmark/bin/rocfft-rider | sed 's,${rf}/lib,'"$out/lib"',') \ - $benchmark/bin/rocfft-rider - ''; -} + samples = stdenv.mkDerivation { + pname = "${finalAttrs.pname}-samples"; + inherit (finalAttrs) version src; + + sourceRoot = "source/clients/samples"; + + nativeBuildInputs = [ + cmake + hip + rocm-cmake + ]; + + buildInputs = [ + boost + finalAttrs.finalPackage + openmp + rocrand + ]; + + cmakeFlags = [ + "-DCMAKE_C_COMPILER=hipcc" + "-DCMAKE_CXX_COMPILER=hipcc" + ]; + + installPhase = '' + runHook preInstall + mkdir "$out" + cp -a bin "$out" + runHook postInstall + ''; + }; + + updateScript = rocmUpdateScript { + name = finalAttrs.pname; + owner = finalAttrs.src.owner; + repo = finalAttrs.src.repo; + }; + }; + + meta = with lib; { + description = "FFT implementation for ROCm"; + homepage = "https://github.com/ROCmSoftwarePlatform/rocFFT"; + license = with licenses; [ mit ]; + maintainers = with maintainers; [ kira-bruneau ] ++ teams.rocm.members; + platforms = platforms.linux; + broken = versions.minor finalAttrs.version != versions.minor hip.version; + }; +}) diff --git a/pkgs/development/libraries/rocfft/device-install.patch b/pkgs/development/libraries/rocfft/device-install.patch new file mode 100644 index 0000000000000..355cf30d07ff1 --- /dev/null +++ b/pkgs/development/libraries/rocfft/device-install.patch @@ -0,0 +1,15 @@ +diff --git a/library/src/device/CMakeLists.txt b/library/src/device/CMakeLists.txt +index 73a8ec9..9bfd4b8 100644 +--- a/library/src/device/CMakeLists.txt ++++ b/library/src/device/CMakeLists.txt +@@ -255,4 +255,10 @@ foreach( sub ${AMDGPU_TARGETS} ) + if( NOT BUILD_SHARED_LIBS ) + set_target_properties( rocfft-device-${sub} PROPERTIES PREFIX "lib" ) + endif( ) ++ ++ rocm_install_targets( ++ TARGETS ++ rocfft-device-${sub} ++ COMPONENT device ++ ) + endforeach() diff --git a/pkgs/development/libraries/rocfft/split-kernel-compilation.patch b/pkgs/development/libraries/rocfft/split-kernel-compilation.patch new file mode 100644 index 0000000000000..5d71fe399c1a5 --- /dev/null +++ b/pkgs/development/libraries/rocfft/split-kernel-compilation.patch @@ -0,0 +1,124 @@ +diff --git a/library/src/CMakeLists.txt b/library/src/CMakeLists.txt +index 3a16304..606b711 100644 +--- a/library/src/CMakeLists.txt ++++ b/library/src/CMakeLists.txt +@@ -250,12 +250,12 @@ foreach( target + + endforeach() + +-add_executable( rocfft_aot_helper ++add_executable( rocfft_aot_helper EXCLUDE_FROM_ALL + rocfft_aot_helper.cpp + rocfft_stub.cpp + ) + +-add_executable( rocfft_config_search ++add_executable( rocfft_config_search EXCLUDE_FROM_ALL + rocfft_config_search.cpp + rocfft_stub.cpp + ) +@@ -279,10 +279,10 @@ endif() + + target_link_libraries( rocfft PRIVATE ${ROCFFT_DEVICE_LINK_LIBS} ) + +-target_link_libraries( rocfft PRIVATE rocfft-device-0 ) +-target_link_libraries( rocfft PRIVATE rocfft-device-1 ) +-target_link_libraries( rocfft PRIVATE rocfft-device-2 ) +-target_link_libraries( rocfft PRIVATE rocfft-device-3 ) ++foreach( sub ${AMDGPU_TARGETS} ) ++ target_link_libraries( rocfft PRIVATE -lrocfft-device-${sub} ) ++endforeach() ++ + foreach( target rocfft rocfft_aot_helper rocfft_config_search ) + # RTC uses dladdr to find the RTC helper program + if( NOT WIN32 ) +@@ -347,7 +347,7 @@ add_custom_command( + DEPENDS rocfft_aot_helper rocfft_rtc_helper + COMMENT "Compile kernels into shipped cache file" + ) +-add_custom_target( rocfft_kernel_cache_target ALL ++add_custom_target( rocfft_kernel_cache_target + DEPENDS rocfft_kernel_cache.db + VERBATIM + ) +@@ -392,7 +392,8 @@ else() + endif() + rocm_install(FILES ${ROCFFT_KERNEL_CACHE_PATH} + DESTINATION "${ROCFFT_KERNEL_CACHE_INSTALL_DIR}" +- COMPONENT runtime ++ COMPONENT kernel_cache ++ EXCLUDE_FROM_ALL + ) + + # PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ +diff --git a/library/src/device/CMakeLists.txt b/library/src/device/CMakeLists.txt +index 9f7b85f..73a8ec9 100644 +--- a/library/src/device/CMakeLists.txt ++++ b/library/src/device/CMakeLists.txt +@@ -170,11 +170,11 @@ list( SORT rocfft_device_source ) + # functions callable by rocFFT and depends on amdhip64, and another + # one usable by AOT RTC that contains no device code + list( FILTER rocfft_device_source EXCLUDE REGEX function_pool.cpp ) +-add_library( rocfft-function-pool OBJECT ++add_library( rocfft-function-pool OBJECT EXCLUDE_FROM_ALL + function_pool.cpp + ) + target_compile_definitions( rocfft-function-pool PRIVATE FUNCTION_POOL_STANDALONE_BODY= ) +-add_library( rocfft-function-pool-standalone OBJECT ++add_library( rocfft-function-pool-standalone OBJECT EXCLUDE_FROM_ALL + function_pool.cpp + ) + target_compile_definitions( rocfft-function-pool-standalone PRIVATE FUNCTION_POOL_STANDALONE_BODY={} ) +@@ -193,26 +193,15 @@ foreach( pool rocfft-function-pool rocfft-function-pool-standalone ) + add_dependencies(${pool} gen_headers_target) + endforeach() + +-list( LENGTH rocfft_device_source rocfft_device_source_len ) +-math(EXPR split_len "${rocfft_device_source_len} / 4") +-math(EXPR split_idx_2 "${rocfft_device_source_len} / 4 * 2") +-math(EXPR split_idx_3 "${rocfft_device_source_len} / 4 * 3") +- +-list( SUBLIST rocfft_device_source 0 ${split_len} rocfft_device_source_0 ) +-list( SUBLIST rocfft_device_source ${split_len} ${split_len} rocfft_device_source_1 ) +-list( SUBLIST rocfft_device_source ${split_idx_2} ${split_len} rocfft_device_source_2 ) +-list( SUBLIST rocfft_device_source ${split_idx_3} -1 rocfft_device_source_3 ) +- +-foreach( sub RANGE 3 ) +- set( rocfft_device_source_var rocfft_device_source_${sub} ) ++foreach( sub ${AMDGPU_TARGETS} ) + if(NOT SINGLELIB) +- add_library( rocfft-device-${sub} +- ${${rocfft_device_source_var}} ) ++ add_library( rocfft-device-${sub} EXCLUDE_FROM_ALL ++ ${rocfft_device_source} ) + else() + # Compile the device lib as a static library, which is then linked + # into librocfft.so Useful for testing purposes. +- add_library( rocfft-device-${sub} STATIC +- ${${rocfft_device_source_var}} ) ++ add_library( rocfft-device-${sub} STATIC EXCLUDE_FROM_ALL ++ ${rocfft_device_source} ) + + # if we're building singlelib, we don't want to export any of the + # device library symbols to the main library +@@ -241,9 +230,7 @@ foreach( sub RANGE 3 ) + # Set AMD GPU architecture options + + # Enable compilation of desired architectures +- foreach( target ${AMDGPU_TARGETS} ) +- target_compile_options( rocfft-device-${sub} PRIVATE --offload-arch=${target} ) +- endforeach( ) ++ target_compile_options( rocfft-device-${sub} PRIVATE --offload-arch=${sub} ) + + target_include_directories( rocfft-device-${sub} + PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}> +@@ -268,9 +255,4 @@ foreach( sub RANGE 3 ) + if( NOT BUILD_SHARED_LIBS ) + set_target_properties( rocfft-device-${sub} PROPERTIES PREFIX "lib" ) + endif( ) +- +- rocm_install_targets( +- TARGETS +- rocfft-device-${sub} +- ) + endforeach() |