1 files changed, 83 insertions, 59 deletions
diff --git a/pkgs/development/python-modules/vllm/default.nix b/pkgs/development/python-modules/vllm/default.nix
index 2418f97452db8..3b9e6ee434871 100644
--- a/pkgs/development/python-modules/vllm/default.nix
+++ b/pkgs/development/python-modules/vllm/default.nix
@@ -1,9 +1,13 @@
 {
   lib,
+  stdenv,
+  python,
   buildPythonPackage,
+  pythonRelaxDepsHook,
   fetchFromGitHub,
   which,
   ninja,
+  cmake,
   packaging,
   setuptools,
   torch,
@@ -22,6 +26,13 @@
   pydantic,
   aioprometheus,
   pynvml,
+  openai,
+  pyzmq,
+  tiktoken,
+  torchvision,
+  py-cpuinfo,
+  lm-format-enforcer,
+  prometheus-fastapi-instrumentator,
   cupy,
   writeShellScript,
 
@@ -30,80 +41,73 @@
   cudaSupport ? config.cudaSupport,
   cudaPackages ? { },
 
-  rocmSupport ? config.rocmSupport,
+  # Has to be either rocm or cuda, default to the free one
+  rocmSupport ? !config.cudaSupport,
   rocmPackages ? { },
   gpuTargets ? [ ],
-}:
+}@args:
+
+let
+  cutlass = fetchFromGitHub {
+    owner = "NVIDIA";
+    repo = "cutlass";
+    rev = "refs/tags/v3.5.0";
+    sha256 = "sha256-D/s7eYsa5l/mfx73tE4mnFcTQdYqGmXa9d9TCryw4e4=";
+  };
+in
 
 buildPythonPackage rec {
   pname = "vllm";
-  version = "0.3.3";
-  format = "pyproject";
+  version = "0.5.3.post1";
+  pyproject = true;
+
+  stdenv = if cudaSupport then cudaPackages.backendStdenv else args.stdenv;
 
   src = fetchFromGitHub {
     owner = "vllm-project";
     repo = pname;
-    rev = "v${version}";
-    hash = "sha256-LU5pCPVv+Ws9dL8oWL1sJGzwQKI1IFk2A1I6TP9gXL4=";
+    rev = "refs/tags/v${version}";
+    hash = "sha256-++DK2Y2zz+1KrEcdQc5XFrSjc7fCwMD2DQ/RqY7PoFU=";
   };
 
-  # Otherwise it tries to enumerate host supported ROCM gfx archs, and that is not possible due to sandboxing.
-  PYTORCH_ROCM_ARCH = lib.optionalString rocmSupport (
-    lib.strings.concatStringsSep ";" rocmPackages.clr.gpuTargets
-  );
-
-  # xformers 0.0.23.post1 github release specifies its version as 0.0.24
-  #
-  # cupy-cuda12x is the same wheel as cupy, but built with cuda dependencies, we already have it set up
-  # like that in nixpkgs. Version upgrade is due to upstream shenanigans
-  # https://github.com/vllm-project/vllm/pull/2845/commits/34a0ad7f9bb7880c0daa2992d700df3e01e91363
-  #
-  # hipcc --version works badly on NixOS due to unresolved paths.
-  postPatch =
-    ''
-      substituteInPlace requirements.txt \
-        --replace "xformers == 0.0.23.post1" "xformers == 0.0.24"
-      substituteInPlace requirements.txt \
-        --replace "cupy-cuda12x == 12.1.0" "cupy == 12.3.0"
-      substituteInPlace requirements-build.txt \
-        --replace "torch==2.1.2" "torch == 2.2.1"
-      substituteInPlace pyproject.toml \
-        --replace "torch == 2.1.2" "torch == 2.2.1"
-      substituteInPlace requirements.txt \
-        --replace "torch == 2.1.2" "torch == 2.2.1"
-    ''
-    + lib.optionalString rocmSupport ''
-      substituteInPlace setup.py \
-        --replace "'hipcc', '--version'" "'${writeShellScript "hipcc-version-stub" "echo HIP version: 0.0"}'"
-    '';
-
-  preBuild =
-    lib.optionalString cudaSupport ''
-      export CUDA_HOME=${cudaPackages.cuda_nvcc}
-    ''
-    + lib.optionalString rocmSupport ''
-      export ROCM_HOME=${rocmPackages.clr}
-      export PATH=$PATH:${rocmPackages.hipcc}
-    '';
+  patches = [
+    ./0001-setup.py-don-t-ask-for-hipcc-version.patch
+    ./0002-setup.py-nix-support-respect-cmakeFlags.patch
+  ];
+
+  # Ignore the python version check because it hard-codes minor versions and
+  # lags behind `ray`'s python interpreter support
+  postPatch = ''
+    substituteInPlace CMakeLists.txt \
+      --replace-fail \
+        'set(PYTHON_SUPPORTED_VERSIONS' \
+        'set(PYTHON_SUPPORTED_VERSIONS "${lib.versions.majorMinor python.version}"'
+  '';
 
   nativeBuildInputs = [
+    cmake
     ninja
+    pythonRelaxDepsHook
+    which
+  ] ++ lib.optionals rocmSupport [ rocmPackages.hipcc ];
+
+  build-system = [
     packaging
     setuptools
-    torch
     wheel
-    which
-  ] ++ lib.optionals rocmSupport [ rocmPackages.hipcc ];
+  ];
 
   buildInputs =
     (lib.optionals cudaSupport (
       with cudaPackages;
       [
         cuda_cudart # cuda_runtime.h, -lcudart
-        cuda_cccl # <thrust/*>
+        cuda_cccl
         libcusparse # cusparse.h
-        libcublas # cublas_v2.h
         libcusolver # cusolverDn.h
+        cuda_nvcc
+        cuda_nvtx
+        libcublas
       ]
     ))
     ++ (lib.optionals rocmSupport (
@@ -117,30 +121,50 @@ buildPythonPackage rec {
       ]
     ));
 
-  propagatedBuildInputs =
+  dependencies =
     [
-      psutil
-      ray
+      aioprometheus
+      fastapi
+      lm-format-enforcer
+      numpy
+      openai
+      outlines
       pandas
+      prometheus-fastapi-instrumentator
+      psutil
+      py-cpuinfo
       pyarrow
+      pydantic
+      pyzmq
+      ray
       sentencepiece
-      numpy
+      tiktoken
       torch
+      torchvision
       transformers
-      outlines
-      xformers
-      fastapi
       uvicorn
-      pydantic
-      aioprometheus
+      xformers
     ]
     ++ uvicorn.optional-dependencies.standard
     ++ aioprometheus.optional-dependencies.starlette
     ++ lib.optionals cudaSupport [
-      pynvml
       cupy
+      pynvml
     ];
 
+  dontUseCmakeConfigure = true;
+  cmakeFlags = [ (lib.cmakeFeature "FETCHCONTENT_SOURCE_DIR_CUTLASS" "${lib.getDev cutlass}") ];
+
+  env =
+    lib.optionalAttrs cudaSupport { CUDA_HOME = "${lib.getDev cudaPackages.cuda_nvcc}"; }
+    // lib.optionalAttrs rocmSupport {
+      # Otherwise it tries to enumerate host supported ROCM gfx archs, and that is not possible due to sandboxing.
+      PYTORCH_ROCM_ARCH = lib.strings.concatStringsSep ";" rocmPackages.clr.gpuTargets;
+      ROCM_HOME = "${rocmPackages.clr}";
+    };
+
+  pythonRelaxDeps = true;
+
   pythonImportsCheck = [ "vllm" ];
 
   meta = with lib; {