From 54818d4cbc1976d09f7dd510e4fa13e5f8ce06e3 Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Sun, 24 Mar 2024 16:37:25 +0100 Subject: [PATCH] server: bench: init --- .github/workflows/bench.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 445bc34940e5cf..eef9f7d279c91d 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -55,7 +55,7 @@ jobs: -DLLAMA_CUBLAS=ON \ -DCUDAToolkit_ROOT=/usr/local/cuda \ -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \ - -DCMAKE_CUDA_ARCHITECTURES=80 \ + -DCMAKE_CUDA_ARCHITECTURES=75 \ -DLLAMA_FATAL_WARNINGS=OFF \ -DLLAMA_ALL_WARNINGS=OFF \ -DCMAKE_BUILD_TYPE=Release; @@ -77,7 +77,7 @@ jobs: id: server_bench run: | build/bin/server \ - --host localhost \ + --host 0.0.0.0 \ --port 8080 \ --hf-repo ggml-org/models \ --hf-file phi-2/ggml-model-q4_0.gguf \ @@ -95,5 +95,9 @@ jobs: sleep 0.1 done + while [[ "$(curl -s -o /dev/null -w ''%{http_code}'' localhost:8080)" != "200" ]]; do + sleep 0.5; + done + cd examples/server/bench ../../../k6 run script.js --duration 10m --iterations 500 --vus 8