about summary refs log tree commit diff
path: root/pkgs/by-name/ol/ollama/package.nix
diff options
context:
space:
mode:
Diffstat (limited to 'pkgs/by-name/ol/ollama/package.nix')
-rw-r--r--pkgs/by-name/ol/ollama/package.nix347
1 files changed, 183 insertions, 164 deletions
diff --git a/pkgs/by-name/ol/ollama/package.nix b/pkgs/by-name/ol/ollama/package.nix
index 93c1198c5b0ea..87a9a0af891a2 100644
--- a/pkgs/by-name/ol/ollama/package.nix
+++ b/pkgs/by-name/ol/ollama/package.nix
@@ -1,81 +1,88 @@
-{ lib
-, buildGo122Module
-, fetchFromGitHub
-, fetchpatch
-, buildEnv
-, linkFarm
-, overrideCC
-, makeWrapper
-, stdenv
-, addDriverRunpath
-
-, cmake
-, gcc12
-, clblast
-, libdrm
-, rocmPackages
-, cudaPackages
-, darwin
-, autoAddDriverRunpath
-
-, nixosTests
-, testers
-, ollama
-, ollama-rocm
-, ollama-cuda
-
-, config
+{
+  lib,
+  buildGoModule,
+  fetchFromGitHub,
+  fetchpatch,
+  buildEnv,
+  linkFarm,
+  overrideCC,
+  makeWrapper,
+  stdenv,
+  addDriverRunpath,
+
+  cmake,
+  gcc12,
+  clblast,
+  libdrm,
+  rocmPackages,
+  cudaPackages,
+  darwin,
+  autoAddDriverRunpath,
+
+  nixosTests,
+  testers,
+  ollama,
+  ollama-rocm,
+  ollama-cuda,
+
+  config,
   # one of `[ null false "rocm" "cuda" ]`
-, acceleration ? null
+  acceleration ? null,
 }:
 
+assert builtins.elem acceleration [
+  null
+  false
+  "rocm"
+  "cuda"
+];
+
 let
   pname = "ollama";
   # don't forget to invalidate all hashes each update
-  version = "0.1.48";
+  version = "0.3.5";
 
   src = fetchFromGitHub {
     owner = "ollama";
     repo = "ollama";
     rev = "v${version}";
-    hash = "sha256-rMStHUFC88TXIH/1c9bCOU0csnEZHOhWKBlLKarmCmE=";
+    hash = "sha256-2lPOkpZ9AmgDFoIHKi+Im1AwXnTxSY3LLtyui1ep3Dw=";
     fetchSubmodules = true;
   };
 
-  vendorHash = "sha256-LNH3mpxIrPMe5emfum1W10jvXIjKC6GkGcjq1HhpJQo=";
+  vendorHash = "sha256-hSxcREAujhvzHVNwnRTfhi0MKI3s8HNavER2VLz6SYk=";
 
   # ollama's patches of llama.cpp's example server
   # `ollama/llm/generate/gen_common.sh` -> "apply temporary patches until fix is upstream"
   # each update, these patches should be synchronized with the contents of `ollama/llm/patches/`
   llamacppPatches = [
-    (preparePatch "01-load-progress.diff" "sha256-K4GryCH/1cl01cyxaMLX3m4mTE79UoGwLMMBUgov+ew=")
+    (preparePatch "01-load-progress.diff" "sha256-UTmnBS5hQjIL3eXDZc8RBDNJunLlkqJWH20LpXNiGRQ=")
     (preparePatch "02-clip-log.diff" "sha256-rMWbl3QgrPlhisTeHwD7EnGRJyOhLB4UeS7rqa0tdXM=")
-    (preparePatch "03-load_exception.diff" "sha256-0XfMtMyg17oihqSFDBakBtAF0JwhsR188D+cOodgvDk=")
-    (preparePatch "04-metal.diff" "sha256-Ne8J9R8NndUosSK0qoMvFfKNwqV5xhhce1nSoYrZo7Y=")
-    (preparePatch "05-default-pretokenizer.diff" "sha256-JnCmFzAkmuI1AqATG3jbX7nGIam4hdDKqqbG5oh7h70=")
-    (preparePatch "06-qwen2.diff" "sha256-nMtoAQUsjYuJv45uTlz8r/K1oF5NUsc75SnhgfSkE30=")
-    (preparePatch "07-gemma.diff" "sha256-dKJrRvg/XC6xtwxLHZ7lFkLNMwT8Ugmd5xRPuKQDXvU=")
+    (preparePatch "03-load_exception.diff" "sha256-NJkT/k8Mf8HcEMb0XkaLmyUNKV3T+384JRPnmwDI/sk=")
+    (preparePatch "04-metal.diff" "sha256-bPBCfoT3EjZPjWKfCzh0pnCUbM/fGTj37yOaQr+QxQ4=")
+    (preparePatch "05-default-pretokenizer.diff" "sha256-PQ0DgfzycUQ8t6S6/yjsMHHx/nFJ0w8AH6afv5Po89w=")
+    (preparePatch "06-embeddings.diff" "sha256-lqg2SI0OapD9LCoAG6MJW6HIHXEmCTv7P75rE9yq/Mo=")
+    (preparePatch "07-clip-unicode.diff" "sha256-1qMJoXhDewxsqPbmi+/7xILQfGaybZDyXc5eH0winL8=")
+    (preparePatch "08-pooling.diff" "sha256-7meKWbr06lbVrtxau0AU9BwJ88Z9svwtDXhmHI+hYBk=")
+    (preparePatch "09-lora.diff" "sha256-tNtI3WHHjBq+PJZGJCBsXHa15dlNJeJm+IiaUbFC0LE=")
+    (preparePatch "11-phi3-sliding-window.diff" "sha256-VbcR4SLa9UXoh8Jq/bPVBerxfg68JZyWALRs7fz7hEs=")
   ];
 
-  preparePatch = patch: hash: fetchpatch {
-    url = "file://${src}/llm/patches/${patch}";
-    inherit hash;
-    stripLen = 1;
-    extraPrefix = "llm/llama.cpp/";
-  };
-
+  preparePatch =
+    patch: hash:
+    fetchpatch {
+      url = "file://${src}/llm/patches/${patch}";
+      inherit hash;
+      stripLen = 1;
+      extraPrefix = "llm/llama.cpp/";
+    };
 
-  accelIsValid = builtins.elem acceleration [ null false "rocm" "cuda" ];
-  validateFallback = lib.warnIf (config.rocmSupport && config.cudaSupport)
-    (lib.concatStrings [
-      "both `nixpkgs.config.rocmSupport` and `nixpkgs.config.cudaSupport` are enabled, "
-      "but they are mutually exclusive; falling back to cpu"
-    ])
-    (!(config.rocmSupport && config.cudaSupport));
-  shouldEnable = assert accelIsValid;
-    mode: fallback:
-      (acceleration == mode)
-      || (fallback && acceleration == null && validateFallback);
+  validateFallback = lib.warnIf (config.rocmSupport && config.cudaSupport) (lib.concatStrings [
+    "both `nixpkgs.config.rocmSupport` and `nixpkgs.config.cudaSupport` are enabled, "
+    "but they are mutually exclusive; falling back to cpu"
+  ]) (!(config.rocmSupport && config.cudaSupport));
+  shouldEnable =
+    mode: fallback: (acceleration == mode) || (fallback && acceleration == null && validateFallback);
 
   rocmRequested = shouldEnable "rocm" config.rocmSupport;
   cudaRequested = shouldEnable "cuda" config.cudaSupport;
@@ -83,7 +90,6 @@ let
   enableRocm = rocmRequested && stdenv.isLinux;
   enableCuda = cudaRequested && stdenv.isLinux;
 
-
   rocmLibs = [
     rocmPackages.clr
     rocmPackages.hipblas
@@ -93,9 +99,7 @@ let
     rocmPackages.rocm-device-libs
     rocmPackages.rocm-smi
   ];
-  rocmClang = linkFarm "rocm-clang" {
-    llvm = rocmPackages.llvm.clang;
-  };
+  rocmClang = linkFarm "rocm-clang" { llvm = rocmPackages.llvm.clang; };
   rocmPath = buildEnv {
     name = "rocm-path";
     paths = rocmLibs ++ [ rocmClang ];
@@ -111,117 +115,132 @@ let
     ];
   };
 
-  appleFrameworks = darwin.apple_sdk_11_0.frameworks;
-  metalFrameworks = [
-    appleFrameworks.Accelerate
-    appleFrameworks.Metal
-    appleFrameworks.MetalKit
-    appleFrameworks.MetalPerformanceShaders
+  metalFrameworks = with darwin.apple_sdk_11_0.frameworks; [
+    Accelerate
+    Metal
+    MetalKit
+    MetalPerformanceShaders
   ];
 
-  wrapperOptions = [
-    # ollama embeds llama-cpp binaries which actually run the ai models
-    # these llama-cpp binaries are unaffected by the ollama binary's DT_RUNPATH
-    # LD_LIBRARY_PATH is temporarily required to use the gpu
-    # until these llama-cpp binaries can have their runpath patched
-    "--suffix LD_LIBRARY_PATH : '${addDriverRunpath.driverLink}/lib'"
-  ] ++ lib.optionals enableRocm [
-    "--suffix LD_LIBRARY_PATH : '${rocmPath}/lib'"
-    "--set-default HIP_PATH '${rocmPath}'"
-  ];
+  wrapperOptions =
+    [
+      # ollama embeds llama-cpp binaries which actually run the ai models
+      # these llama-cpp binaries are unaffected by the ollama binary's DT_RUNPATH
+      # LD_LIBRARY_PATH is temporarily required to use the gpu
+      # until these llama-cpp binaries can have their runpath patched
+      "--suffix LD_LIBRARY_PATH : '${addDriverRunpath.driverLink}/lib'"
+    ]
+    ++ lib.optionals enableRocm [
+      "--suffix LD_LIBRARY_PATH : '${rocmPath}/lib'"
+      "--set-default HIP_PATH '${rocmPath}'"
+    ];
   wrapperArgs = builtins.concatStringsSep " " wrapperOptions;
 
-
   goBuild =
-    if enableCuda then
-      buildGo122Module.override { stdenv = overrideCC stdenv gcc12; }
-    else
-      buildGo122Module;
+    if enableCuda then buildGoModule.override { stdenv = overrideCC stdenv gcc12; } else buildGoModule;
   inherit (lib) licenses platforms maintainers;
 in
-goBuild ((lib.optionalAttrs enableRocm {
-  ROCM_PATH = rocmPath;
-  CLBlast_DIR = "${clblast}/lib/cmake/CLBlast";
-}) // {
-  inherit pname version src vendorHash;
-
-  nativeBuildInputs = [
-    cmake
-  ] ++ lib.optionals enableRocm [
-    rocmPackages.llvm.bintools
-  ] ++ lib.optionals enableCuda [
-    cudaPackages.cuda_nvcc
-  ] ++ lib.optionals (enableRocm || enableCuda) [
-    makeWrapper
-    autoAddDriverRunpath
-  ] ++ lib.optionals stdenv.isDarwin
-    metalFrameworks;
-
-  buildInputs = lib.optionals enableRocm
-    (rocmLibs ++ [ libdrm ])
-  ++ lib.optionals enableCuda [
-    cudaPackages.cuda_cudart
-    cudaPackages.libcublas
-  ] ++ lib.optionals stdenv.isDarwin
-    metalFrameworks;
-
-  patches = [
-    # disable uses of `git` in the `go generate` script
-    # ollama's build script assumes the source is a git repo, but nix removes the git directory
-    # this also disables necessary patches contained in `ollama/llm/patches/`
-    # those patches are added to `llamacppPatches`, and reapplied here in the patch phase
-    ./disable-git.patch
-    # disable a check that unnecessarily exits compilation during rocm builds
-    # since `rocmPath` is in `LD_LIBRARY_PATH`, ollama uses rocm correctly
-    ./disable-lib-check.patch
-  ] ++ llamacppPatches;
-  postPatch = ''
-    # replace inaccurate version number with actual release version
-    substituteInPlace version/version.go --replace-fail 0.0.0 '${version}'
-  '';
-  preBuild = ''
-    # disable uses of `git`, since nix removes the git directory
-    export OLLAMA_SKIP_PATCHING=true
-    # build llama.cpp libraries for ollama
-    go generate ./...
-  '';
-  postFixup = ''
-    # the app doesn't appear functional at the moment, so hide it
-    mv "$out/bin/app" "$out/bin/.ollama-app"
-  '' + lib.optionalString (enableRocm || enableCuda) ''
-    # expose runtime libraries necessary to use the gpu
-    wrapProgram "$out/bin/ollama" ${wrapperArgs}
-  '';
-
-  ldflags = [
-    "-s"
-    "-w"
-    "-X=github.com/ollama/ollama/version.Version=${version}"
-    "-X=github.com/ollama/ollama/server.mode=release"
-  ];
+goBuild (
+  (lib.optionalAttrs enableRocm {
+    ROCM_PATH = rocmPath;
+    CLBlast_DIR = "${clblast}/lib/cmake/CLBlast";
+  })
+  // (lib.optionalAttrs enableCuda { CUDA_LIB_DIR = "${cudaToolkit}/lib"; })
+  // {
+    inherit
+      pname
+      version
+      src
+      vendorHash
+      ;
+
+    nativeBuildInputs =
+      [ cmake ]
+      ++ lib.optionals enableRocm [ rocmPackages.llvm.bintools ]
+      ++ lib.optionals enableCuda [ cudaPackages.cuda_nvcc ]
+      ++ lib.optionals (enableRocm || enableCuda) [
+        makeWrapper
+        autoAddDriverRunpath
+      ]
+      ++ lib.optionals stdenv.isDarwin metalFrameworks;
+
+    buildInputs =
+      lib.optionals enableRocm (rocmLibs ++ [ libdrm ])
+      ++ lib.optionals enableCuda [
+        cudaPackages.cuda_cudart
+        cudaPackages.cuda_cccl
+        cudaPackages.libcublas
+      ]
+      ++ lib.optionals stdenv.isDarwin metalFrameworks;
+
+    patches = [
+      # disable uses of `git` in the `go generate` script
+      # ollama's build script assumes the source is a git repo, but nix removes the git directory
+      # this also disables necessary patches contained in `ollama/llm/patches/`
+      # those patches are added to `llamacppPatches`, and reapplied here in the patch phase
+      ./disable-git.patch
+      # disable a check that unnecessarily exits compilation during rocm builds
+      # since `rocmPath` is in `LD_LIBRARY_PATH`, ollama uses rocm correctly
+      ./disable-lib-check.patch
+    ] ++ llamacppPatches;
+    postPatch = ''
+      # replace inaccurate version number with actual release version
+      substituteInPlace version/version.go --replace-fail 0.0.0 '${version}'
+    '';
+    preBuild = ''
+      # disable uses of `git`, since nix removes the git directory
+      export OLLAMA_SKIP_PATCHING=true
+      # build llama.cpp libraries for ollama
+      go generate ./...
+    '';
+    postFixup =
+      ''
+        # the app doesn't appear functional at the moment, so hide it
+        mv "$out/bin/app" "$out/bin/.ollama-app"
+      ''
+      + lib.optionalString (enableRocm || enableCuda) ''
+        # expose runtime libraries necessary to use the gpu
+        wrapProgram "$out/bin/ollama" ${wrapperArgs}
+      '';
+
+    ldflags = [
+      "-s"
+      "-w"
+      "-X=github.com/ollama/ollama/version.Version=${version}"
+      "-X=github.com/ollama/ollama/server.mode=release"
+    ];
 
-  passthru.tests = {
-    inherit ollama;
-    service = nixosTests.ollama;
-    version = testers.testVersion {
-      inherit version;
-      package = ollama;
+    passthru.tests =
+      {
+        inherit ollama;
+        version = testers.testVersion {
+          inherit version;
+          package = ollama;
+        };
+      }
+      // lib.optionalAttrs stdenv.isLinux {
+        inherit ollama-rocm ollama-cuda;
+        service = nixosTests.ollama;
+        service-cuda = nixosTests.ollama-cuda;
+        service-rocm = nixosTests.ollama-rocm;
+      };
+
+    meta = {
+      description =
+        "Get up and running with large language models locally"
+        + lib.optionalString rocmRequested ", using ROCm for AMD GPU acceleration"
+        + lib.optionalString cudaRequested ", using CUDA for NVIDIA GPU acceleration";
+      homepage = "https://github.com/ollama/ollama";
+      changelog = "https://github.com/ollama/ollama/releases/tag/v${version}";
+      license = licenses.mit;
+      platforms = if (rocmRequested || cudaRequested) then platforms.linux else platforms.unix;
+      mainProgram = "ollama";
+      maintainers = with maintainers; [
+        abysssol
+        dit7ya
+        elohmeier
+        roydubnium
+      ];
     };
-  } // lib.optionalAttrs stdenv.isLinux {
-    inherit ollama-rocm ollama-cuda;
-  };
-
-  meta = {
-    description = "Get up and running with large language models locally"
-      + lib.optionalString rocmRequested ", using ROCm for AMD GPU acceleration"
-      + lib.optionalString cudaRequested ", using CUDA for NVIDIA GPU acceleration";
-    homepage = "https://github.com/ollama/ollama";
-    changelog = "https://github.com/ollama/ollama/releases/tag/v${version}";
-    license = licenses.mit;
-    platforms =
-      if (rocmRequested || cudaRequested) then platforms.linux
-      else platforms.unix;
-    mainProgram = "ollama";
-    maintainers = with maintainers; [ abysssol dit7ya elohmeier roydubnium ];
-  };
-})
+  }
+)