1 files changed, 28 insertions, 2 deletions
diff --git a/pkgs/by-name/lo/local-ai/tests.nix b/pkgs/by-name/lo/local-ai/tests.nix
index 7cebc6fff9387..5740362f24efd 100644
--- a/pkgs/by-name/lo/local-ai/tests.nix
+++ b/pkgs/by-name/lo/local-ai/tests.nix
@@ -6,6 +6,7 @@
 , writers
 , symlinkJoin
 , jq
+, prom2json
 }:
 let
   common-config = { config, ... }: {
@@ -14,6 +15,7 @@ let
       enable = true;
       package = self;
       threads = config.virtualisation.cores;
+      logLevel = "debug";
     };
   };
 
@@ -26,7 +28,7 @@ in
     command = "local-ai --help";
   };
 
-  health = testers.runNixOSTest ({ config, ... }: {
+  health = testers.runNixOSTest {
     name = self.name + "-health";
     nodes.machine = common-config;
     testScript =
@@ -36,8 +38,11 @@ in
       ''
         machine.wait_for_open_port(${port})
         machine.succeed("curl -f http://localhost:${port}/readyz")
+
+        machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json")
+        machine.copy_from_vm("metrics.json")
       '';
-  });
+  };
 
   # https://localai.io/features/embeddings/#bert-embeddings
   bert =
@@ -78,8 +83,13 @@ in
           machine.succeed("curl -f http://localhost:${port}/readyz")
           machine.succeed("curl -f http://localhost:${port}/v1/models --output models.json")
           machine.succeed("${jq}/bin/jq --exit-status 'debug | .data[].id == \"${model}\"' models.json")
+
           machine.succeed("curl -f http://localhost:${port}/embeddings --json @${writers.writeJSON "request.json" requests.request} --output embeddings.json")
+          machine.copy_from_vm("embeddings.json")
           machine.succeed("${jq}/bin/jq --exit-status 'debug | .model == \"${model}\"' embeddings.json")
+
+          machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json")
+          machine.copy_from_vm("metrics.json")
         '';
     };
 
@@ -92,6 +102,7 @@ in
       # https://localai.io/advanced/#full-config-model-file-reference
       model-configs.${model} = rec {
         context_size = 8192;
+        backend = "llama-cpp";
         parameters = {
           # https://huggingface.co/lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF
           # https://ai.meta.com/blog/meta-llama-3/
@@ -157,6 +168,8 @@ in
         virtualisation.cores = 4;
         virtualisation.memorySize = 8192;
         services.local-ai.models = models;
+        # TODO: Add test case parallel requests
+        services.local-ai.parallelRequests = 2;
       };
       passthru = { inherit models requests; };
       testScript =
@@ -170,16 +183,22 @@ in
           machine.succeed("${jq}/bin/jq --exit-status 'debug | .data[].id == \"${model}\"' models.json")
 
           machine.succeed("curl -f http://localhost:${port}/v1/chat/completions --json @${writers.writeJSON "request-chat-completions.json" requests.chat-completions} --output chat-completions.json")
+          machine.copy_from_vm("chat-completions.json")
           machine.succeed("${jq}/bin/jq --exit-status 'debug | .object == \"chat.completion\"' chat-completions.json")
           machine.succeed("${jq}/bin/jq --exit-status 'debug | .choices | first.message.content | tonumber == 3' chat-completions.json")
 
           machine.succeed("curl -f http://localhost:${port}/v1/edits --json @${writers.writeJSON "request-edit-completions.json" requests.edit-completions} --output edit-completions.json")
+          machine.copy_from_vm("edit-completions.json")
           machine.succeed("${jq}/bin/jq --exit-status 'debug | .object == \"edit\"' edit-completions.json")
           machine.succeed("${jq}/bin/jq --exit-status '.usage.completion_tokens | debug == ${toString requests.edit-completions.max_tokens}' edit-completions.json")
 
           machine.succeed("curl -f http://localhost:${port}/v1/completions --json @${writers.writeJSON "request-completions.json" requests.completions} --output completions.json")
+          machine.copy_from_vm("completions.json")
           machine.succeed("${jq}/bin/jq --exit-status 'debug | .object ==\"text_completion\"' completions.json")
           machine.succeed("${jq}/bin/jq --exit-status '.usage.completion_tokens | debug == ${toString model-configs.${model}.parameters.max_tokens}' completions.json")
+
+          machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json")
+          machine.copy_from_vm("metrics.json")
         '';
     };
 
@@ -240,9 +259,16 @@ in
           machine.succeed("curl -f http://localhost:${port}/readyz")
           machine.succeed("curl -f http://localhost:${port}/v1/models --output models.json")
           machine.succeed("${jq}/bin/jq --exit-status 'debug' models.json")
+
           machine.succeed("curl -f http://localhost:${port}/tts --json @${writers.writeJSON "request.json" requests.request} --output out.wav")
+          machine.copy_from_vm("out.wav")
+
           machine.succeed("curl -f http://localhost:${port}/v1/audio/transcriptions --header 'Content-Type: multipart/form-data' --form file=@out.wav --form model=${model-stt} --output transcription.json")
+          machine.copy_from_vm("transcription.json")
           machine.succeed("${jq}/bin/jq --exit-status 'debug | .segments | first.text == \"${requests.request.input}\"' transcription.json")
+
+          machine.succeed("${prom2json}/bin/prom2json http://localhost:${port}/metrics > metrics.json")
+          machine.copy_from_vm("metrics.json")
         '';
     };
 }