about summary refs log tree commit diff
path: root/pkgs/development/python-modules/vllm/default.nix
blob: 2418f97452db896567f0e641852dc2fa08b66fd7 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
{
  lib,
  buildPythonPackage,
  fetchFromGitHub,
  which,
  ninja,
  packaging,
  setuptools,
  torch,
  outlines,
  wheel,
  psutil,
  ray,
  pandas,
  pyarrow,
  sentencepiece,
  numpy,
  transformers,
  xformers,
  fastapi,
  uvicorn,
  pydantic,
  aioprometheus,
  pynvml,
  cupy,
  writeShellScript,

  config,

  cudaSupport ? config.cudaSupport,
  cudaPackages ? { },

  rocmSupport ? config.rocmSupport,
  rocmPackages ? { },
  gpuTargets ? [ ],
}:

buildPythonPackage rec {
  pname = "vllm";
  version = "0.3.3";
  format = "pyproject";

  src = fetchFromGitHub {
    owner = "vllm-project";
    repo = pname;
    rev = "v${version}";
    hash = "sha256-LU5pCPVv+Ws9dL8oWL1sJGzwQKI1IFk2A1I6TP9gXL4=";
  };

  # Otherwise it tries to enumerate host supported ROCM gfx archs, and that is not possible due to sandboxing.
  PYTORCH_ROCM_ARCH = lib.optionalString rocmSupport (
    lib.strings.concatStringsSep ";" rocmPackages.clr.gpuTargets
  );

  # xformers 0.0.23.post1 github release specifies its version as 0.0.24
  #
  # cupy-cuda12x is the same wheel as cupy, but built with cuda dependencies, we already have it set up
  # like that in nixpkgs. Version upgrade is due to upstream shenanigans
  # https://github.com/vllm-project/vllm/pull/2845/commits/34a0ad7f9bb7880c0daa2992d700df3e01e91363
  #
  # hipcc --version works badly on NixOS due to unresolved paths.
  postPatch =
    ''
      substituteInPlace requirements.txt \
        --replace "xformers == 0.0.23.post1" "xformers == 0.0.24"
      substituteInPlace requirements.txt \
        --replace "cupy-cuda12x == 12.1.0" "cupy == 12.3.0"
      substituteInPlace requirements-build.txt \
        --replace "torch==2.1.2" "torch == 2.2.1"
      substituteInPlace pyproject.toml \
        --replace "torch == 2.1.2" "torch == 2.2.1"
      substituteInPlace requirements.txt \
        --replace "torch == 2.1.2" "torch == 2.2.1"
    ''
    + lib.optionalString rocmSupport ''
      substituteInPlace setup.py \
        --replace "'hipcc', '--version'" "'${writeShellScript "hipcc-version-stub" "echo HIP version: 0.0"}'"
    '';

  preBuild =
    lib.optionalString cudaSupport ''
      export CUDA_HOME=${cudaPackages.cuda_nvcc}
    ''
    + lib.optionalString rocmSupport ''
      export ROCM_HOME=${rocmPackages.clr}
      export PATH=$PATH:${rocmPackages.hipcc}
    '';

  nativeBuildInputs = [
    ninja
    packaging
    setuptools
    torch
    wheel
    which
  ] ++ lib.optionals rocmSupport [ rocmPackages.hipcc ];

  buildInputs =
    (lib.optionals cudaSupport (
      with cudaPackages;
      [
        cuda_cudart # cuda_runtime.h, -lcudart
        cuda_cccl # <thrust/*>
        libcusparse # cusparse.h
        libcublas # cublas_v2.h
        libcusolver # cusolverDn.h
      ]
    ))
    ++ (lib.optionals rocmSupport (
      with rocmPackages;
      [
        clr
        rocthrust
        rocprim
        hipsparse
        hipblas
      ]
    ));

  propagatedBuildInputs =
    [
      psutil
      ray
      pandas
      pyarrow
      sentencepiece
      numpy
      torch
      transformers
      outlines
      xformers
      fastapi
      uvicorn
      pydantic
      aioprometheus
    ]
    ++ uvicorn.optional-dependencies.standard
    ++ aioprometheus.optional-dependencies.starlette
    ++ lib.optionals cudaSupport [
      pynvml
      cupy
    ];

  pythonImportsCheck = [ "vllm" ];

  meta = with lib; {
    description = "High-throughput and memory-efficient inference and serving engine for LLMs";
    changelog = "https://github.com/vllm-project/vllm/releases/tag/v${version}";
    homepage = "https://github.com/vllm-project/vllm";
    license = licenses.asl20;
    maintainers = with maintainers; [
      happysalada
      lach
    ];
    broken = !cudaSupport && !rocmSupport;
  };
}