From 2c3b19205728a46c19bea75091cf4fad0b8f6eda Mon Sep 17 00:00:00 2001
From: Pierrick HYMBERT <pierrick.hymbert@gmail.com>
Date: Mon, 25 Mar 2024 21:11:23 +0100
Subject: [PATCH] server: bench: init

---
 .github/workflows/bench.yml    | 16 +++++++---------
 examples/server/bench/bench.py |  4 ++--
 2 files changed, 9 insertions(+), 11 deletions(-)
diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
index 307451be794f06..788e7de3587431 100644
--- a/.github/workflows/bench.yml
+++ b/.github/workflows/bench.yml
@@ -106,7 +106,7 @@ jobs:
               --branch ${{ github.head_ref || github.ref_name }} \
               --commit ${{ github.sha }} \
               --scenario script.js \
-              --duration 1m \
+              --duration 10m \
               --hf-repo ggml-org/models	 \
               --hf-file phi-2/ggml-model-q4_0.gguf \
               --model-path-prefix /models \
@@ -120,8 +120,6 @@ jobs:
               --max-tokens 2048
 
           cat results.github.env >> $GITHUB_ENV
-          
-          cat results.github.env
 
       - name: Commit status
         uses: Sibz/github-status-action@v1
@@ -151,17 +149,17 @@ jobs:
         with:
           message-id: bench-${{ github.job }}-${{ env.RUNNER_LABEL }}
           message: |
-            📈 **llama.cpp** server benchmark for _${{ github.job }}_ on **${{ env.RUNNER_LABEL }}**: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
+            📈 **llama.cpp server** benchmark for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
             <p align="center">
-                <img width="80%" height="80%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" alt="prompt_tokens_seconds" />
-                <img width="80%" height="80%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" alt="predicted_tokens_seconds"/>
+                <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" alt="prompt_tokens_seconds" />
+                <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" alt="predicted_tokens_seconds"/>
             </p>
             <details>
                 <summary>Details</summary>
                 <p align="center">
-                    <img width="80%" height="80%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" alt="kv_cache_usage_ratio" />
-                    <img width="80%" height="80%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" alt="requests_processing"/>
-                    <img width="80%" height="80%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[4] }}" alt="requests_deferred"/>
+                    <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" alt="kv_cache_usage_ratio" />
+                    <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" alt="requests_processing"/>
+                    <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[4] }}" alt="requests_deferred"/>
                 </p>
             </detail>
 
diff --git a/examples/server/bench/bench.py b/examples/server/bench/bench.py
index 286104b9ca987e..c0e08ae1972272 100644
--- a/examples/server/bench/bench.py
+++ b/examples/server/bench/bench.py
@@ -122,8 +122,8 @@ def main(args_in: list[str] | None = None) -> None:
                 plt.xticks(rotation=0, fontsize=14, horizontalalignment='center', alpha=.7)
                 plt.yticks(fontsize=12, alpha=.7)
 
-                plt.title(f"llama.cpp {args.name} on {args.runner_label} {iterations} iterations\n"
-                          f"duration={args.duration}",
+                plt.title(f"llama.cpp {args.name} on {args.runner_label}\n"
+                          f"duration={args.duration} {iterations} iterations",
                           fontsize=14, wrap=True)
                 plt.grid(axis='both', alpha=.3)
                 plt.ylabel(f"llamacpp:{metric}", fontsize=22)