From 2c3b19205728a46c19bea75091cf4fad0b8f6eda Mon Sep 17 00:00:00 2001 From: Pierrick HYMBERT Date: Mon, 25 Mar 2024 21:11:23 +0100 Subject: [PATCH] server: bench: init --- .github/workflows/bench.yml | 16 +++++++--------- examples/server/bench/bench.py | 4 ++-- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 307451be794f06..788e7de3587431 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -106,7 +106,7 @@ jobs: --branch ${{ github.head_ref || github.ref_name }} \ --commit ${{ github.sha }} \ --scenario script.js \ - --duration 1m \ + --duration 10m \ --hf-repo ggml-org/models \ --hf-file phi-2/ggml-model-q4_0.gguf \ --model-path-prefix /models \ @@ -120,8 +120,6 @@ jobs: --max-tokens 2048 cat results.github.env >> $GITHUB_ENV - - cat results.github.env - name: Commit status uses: Sibz/github-status-action@v1 @@ -151,17 +149,17 @@ jobs: with: message-id: bench-${{ github.job }}-${{ env.RUNNER_LABEL }} message: | - 📈 **llama.cpp** server benchmark for _${{ github.job }}_ on **${{ env.RUNNER_LABEL }}**: **${{ env.BENCH_ITERATIONS}} iterations** 🚀 + 📈 **llama.cpp server** benchmark for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_: **${{ env.BENCH_ITERATIONS}} iterations** 🚀

- prompt_tokens_seconds - predicted_tokens_seconds + prompt_tokens_seconds + predicted_tokens_seconds

Details

- kv_cache_usage_ratio - requests_processing - requests_deferred + kv_cache_usage_ratio + requests_processing + requests_deferred

diff --git a/examples/server/bench/bench.py b/examples/server/bench/bench.py index 286104b9ca987e..c0e08ae1972272 100644 --- a/examples/server/bench/bench.py +++ b/examples/server/bench/bench.py @@ -122,8 +122,8 @@ def main(args_in: list[str] | None = None) -> None: plt.xticks(rotation=0, fontsize=14, horizontalalignment='center', alpha=.7) plt.yticks(fontsize=12, alpha=.7) - plt.title(f"llama.cpp {args.name} on {args.runner_label} {iterations} iterations\n" - f"duration={args.duration}", + plt.title(f"llama.cpp {args.name} on {args.runner_label}\n" + f"duration={args.duration} {iterations} iterations", fontsize=14, wrap=True) plt.grid(axis='both', alpha=.3) plt.ylabel(f"llamacpp:{metric}", fontsize=22)