Switch to llama-simple-chat

This is a new chat program in llama.cpp which is much simpler than the existing one we were using. It doesn't have the debug/verbose output problem and just seems higher quality in general for a simple chatbot, it's a few 100 lines of code. Signed-off-by: Eric Curtin <[email protected]>
containers · Nov 14, 2024 · 1db401d · 1db401d
1 parent 1b5eb20
commit 1db401d
Show file tree

Hide file tree

Showing 4 changed files with 5 additions and 16 deletions.
diff --git a/container-images/cuda/Containerfile b/container-images/cuda/Containerfile
@@ -1,7 +1,7 @@
 # Base image with CUDA for compilation
 FROM docker.io/nvidia/cuda:12.6.2-devel-ubi9 AS builder
 
-ARG LLAMA_CPP_SHA=1329c0a75e6a7defc5c380eaf80d8e0f66d7da78
+ARG LLAMA_CPP_SHA=af148c9386da825a60c7038549c121c35ca56b50
 # renovate: datasource=git-refs depName=ggerganov/whisper.cpp packageName=https://github.com/ggerganov/whisper.cpp gitRef=master versioning=loose type=digest
 ARG WHISPER_CPP_SHA=f19463ece2d43fd0b605dc513d8800eeb4e2315e
 

diff --git a/container-images/ramalama/Containerfile b/container-images/ramalama/Containerfile
@@ -1,6 +1,6 @@
 FROM registry.access.redhat.com/ubi9/ubi:9.4-1214.1729773476
 
-ARG LLAMA_CPP_SHA=1329c0a75e6a7defc5c380eaf80d8e0f66d7da78
+ARG LLAMA_CPP_SHA=af148c9386da825a60c7038549c121c35ca56b50
 # renovate: datasource=git-refs depName=ggerganov/whisper.cpp packageName=https://github.com/ggerganov/whisper.cpp gitRef=master versioning=loose type=digest
 ARG WHISPER_CPP_SHA=f19463ece2d43fd0b605dc513d8800eeb4e2315e
 

diff --git a/ramalama/model.py b/ramalama/model.py
@@ -267,19 +267,8 @@ def run(self, args):
         if not args.container:
             exec_model_path = model_path
 
-        exec_args = ["llama-cli", "-m", exec_model_path, "--in-prefix", "", "--in-suffix", ""]
-
-        if not args.debug:
-            exec_args += ["--no-display-prompt"]
-
-        exec_args += [
-            "-p",
-            prompt,
-        ] + self.common_params
-
-        if not args.ARGS and sys.stdin.isatty():
-            exec_args.append("-cnv")
-
+        exec_args = ["llama-simple-chat", "-m", exec_model_path]
+        exec_args += self.common_params
         if args.gpu:
             exec_args.extend(self.gpu_args())
 

diff --git a/test/system/030-run.bats b/test/system/030-run.bats
@@ -29,7 +29,7 @@ load helpers
 	is "$output" ".*${image} /bin/sh -c" "verify image name"
     else
 	run_ramalama --dryrun run ${model}
-	is "$output" 'llama-cli -m /path/to/model --in-prefix --in-suffix --no-display-prompt -p.*' "dryrun correct"
+	is "$output" 'llama-simple-chat -m /path/to/model.*' "dryrun correct"
 
 	run_ramalama 1 run --name foobar tiny
 	is "${lines[0]}"  "Error: --nocontainer and --name options conflict. --name requires a container." "conflict between nocontainer and --name line"