diff options
Diffstat (limited to 'pkgs/by-name/mi/mistral-rs/package.nix')
-rw-r--r-- | pkgs/by-name/mi/mistral-rs/package.nix | 201 |
1 files changed, 201 insertions, 0 deletions
diff --git a/pkgs/by-name/mi/mistral-rs/package.nix b/pkgs/by-name/mi/mistral-rs/package.nix new file mode 100644 index 0000000000000..4c5324e8d856e --- /dev/null +++ b/pkgs/by-name/mi/mistral-rs/package.nix @@ -0,0 +1,201 @@ +{ + lib, + rustPlatform, + fetchFromGitHub, + + # nativeBuildInputs + pkg-config, + python3, + + # buildInputs + oniguruma, + openssl, + mkl, + stdenv, + darwin, + + # env + fetchurl, + + testers, + mistral-rs, + + cudaPackages, + cudaCapability ? null, + + config, + # one of `[ null false "cuda" "mkl" "metal" ]` + acceleration ? null, + +}: + +let + accelIsValid = builtins.elem acceleration [ + null + false + "cuda" + "mkl" + "metal" + ]; + + cudaSupport = + assert accelIsValid; + (acceleration == "cuda") || (config.cudaSupport && acceleration == null); + + minRequiredCudaCapability = "6.1"; # build fails with 6.0 + inherit (cudaPackages.cudaFlags) cudaCapabilities; + cudaCapabilityString = + if cudaCapability == null then + (builtins.head ( + (builtins.filter (cap: lib.versionAtLeast cap minRequiredCudaCapability) cudaCapabilities) + ++ [ + (lib.warn "mistral-rs doesn't support ${lib.concatStringsSep " " cudaCapabilities}" minRequiredCudaCapability) + ] + )) + else + cudaCapability; + cudaCapability' = lib.toInt (cudaPackages.cudaFlags.dropDot cudaCapabilityString); + + # TODO Should we assert mklAccel -> stdenv.isLinux && stdenv.isx86_64 ? + mklSupport = + assert accelIsValid; + (acceleration == "mkl"); + + metalSupport = + assert accelIsValid; + (acceleration == "metal") || (stdenv.isDarwin && stdenv.isAarch64 && (acceleration == null)); + + darwinBuildInputs = + with darwin.apple_sdk.frameworks; + [ + Accelerate + CoreVideo + CoreGraphics + ] + ++ lib.optionals metalSupport [ + MetalKit + MetalPerformanceShaders + ]; +in + +rustPlatform.buildRustPackage rec { + pname = "mistral-rs"; + version = "0.1.18"; + + src = fetchFromGitHub { + owner = "EricLBuehler"; + repo = "mistral.rs"; + rev = "refs/tags/v${version}"; + hash = "sha256-lMDFWNv9b0UfckqLmyWRVwnqmGe6nxYsUHzoi2+oG84="; + }; + + cargoLock = { + lockFile = ./Cargo.lock; + outputHashes = { + "candle-core-0.6.0" = "sha256-DxGBWf2H7MamrbboTJ4zHy1HeE8ZVT7QvE3sTYrRxBc="; + "range-checked-0.1.0" = "sha256-S+zcF13TjwQPFWZLIbUDkvEeaYdaxCOtDLtI+JRvum8="; + }; + }; + + postPatch = '' + ln -s ${./Cargo.lock} Cargo.lock + ''; + + nativeBuildInputs = [ + pkg-config + python3 + ] ++ lib.optionals cudaSupport [ cudaPackages.cuda_nvcc ]; + + buildInputs = + [ + oniguruma + openssl + ] + ++ lib.optionals cudaSupport [ + cudaPackages.cuda_nvrtc + cudaPackages.libcublas + cudaPackages.libcurand + ] + ++ lib.optionals mklSupport [ mkl ] + ++ lib.optionals stdenv.isDarwin darwinBuildInputs; + + cargoBuildFlags = + lib.optionals cudaSupport [ "--features=cuda" ] + ++ lib.optionals mklSupport [ "--features=mkl" ] + ++ lib.optionals (stdenv.isDarwin && metalSupport) [ "--features=metal" ]; + + env = + { + SWAGGER_UI_DOWNLOAD_URL = + let + # When updating: + # - Look for the version of `utopia-swagger-ui` at: + # https://github.com/EricLBuehler/mistral.rs/blob/v<MISTRAL-RS-VERSION>/mistralrs-server/Cargo.toml + # - Look at the corresponding version of `swagger-ui` at: + # https://github.com/juhaku/utoipa/blob/utoipa-swagger-ui-<UTOPIA-SWAGGER-UI-VERSION>/utoipa-swagger-ui/build.rs#L21-L22 + swaggerUiVersion = "5.17.12"; + + swaggerUi = fetchurl { + url = "https://github.com/swagger-api/swagger-ui/archive/refs/tags/v${swaggerUiVersion}.zip"; + hash = "sha256-HK4z/JI+1yq8BTBJveYXv9bpN/sXru7bn/8g5mf2B/I="; + }; + in + "file://${swaggerUi}"; + + RUSTONIG_SYSTEM_LIBONIG = true; + } + // (lib.optionalAttrs cudaSupport { + CUDA_COMPUTE_CAP = cudaCapability'; + + # Apparently, cudart is enough: No need to provide the entire cudaPackages.cudatoolkit derivation. + CUDA_TOOLKIT_ROOT_DIR = lib.getDev cudaPackages.cuda_cudart; + }); + + NVCC_PREPEND_FLAGS = lib.optionals cudaSupport [ + "-I${lib.getDev cudaPackages.cuda_cudart}/include" + "-I${lib.getDev cudaPackages.cuda_cccl}/include" + ]; + + # swagger-ui will once more be copied in the target directory during the check phase + # Not deleting the existing unpacked archive leads to a `PermissionDenied` error + preCheck = '' + rm -rf target/${stdenv.hostPlatform.config}/release/build/ + ''; + + # Try to access internet + checkFlags = [ + "--skip=gguf::gguf_tokenizer::tests::test_decode_gpt2" + "--skip=gguf::gguf_tokenizer::tests::test_decode_llama" + "--skip=gguf::gguf_tokenizer::tests::test_encode_gpt2" + "--skip=gguf::gguf_tokenizer::tests::test_encode_llama" + "--skip=sampler::tests::test_argmax" + "--skip=sampler::tests::test_gumbel_speculative" + ]; + + passthru = { + tests = { + version = testers.testVersion { package = mistral-rs; }; + + withMkl = mistral-rs.override { acceleration = "mkl"; }; + withCuda = mistral-rs.override { acceleration = "cuda"; }; + withMetal = mistral-rs.override { acceleration = "metal"; }; + }; + }; + + meta = { + description = "Blazingly fast LLM inference"; + homepage = "https://github.com/EricLBuehler/mistral.rs"; + changelog = "https://github.com/EricLBuehler/mistral.rs/releases/tag/v${version}"; + license = lib.licenses.mit; + maintainers = with lib.maintainers; [ GaetanLepage ]; + mainProgram = "mistralrs-server"; + platforms = + if cudaSupport then + lib.platforms.linux + else if metalSupport then + [ "aarch64-darwin" ] + else + lib.platforms.unix; + broken = mklSupport; + }; +} |