diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
index f4e48cafbe025d..9197c82b515cfb 100644
--- a/.github/workflows/bench.yml
+++ b/.github/workflows/bench.yml
@@ -102,7 +102,7 @@ jobs:
--branch ${{ github.head_ref || github.ref_name }} \
--commit ${{ github.sha }} \
--scenario script.js \
- --duration 2m \
+ --duration 1m \
--hf-repo ggml-org/models \
--hf-file phi-2/ggml-model-q4_0.gguf \
--model-path-prefix /models \
@@ -122,6 +122,21 @@ jobs:
echo KV_CACHE_USAGE_RATIO_=${KV_CACHE_USAGE_RATIO//
/\n} >> $GITHUB_ENV
echo REQUESTS_PROCESSING_=${REQUESTS_PROCESSING//
/\n} >> $GITHUB_ENV
+ - name: Server bench
+ id: server_bench_mermaid
+ env:
+ PROMPT_TOKENS_SECONDS: ${{ env.PROMPT_TOKENS_SECONDS }}
+ PREDICTED_TOKENS_SECONDS: ${{ env.PREDICTED_TOKENS_SECONDS }}
+ KV_CACHE_USAGE_RATIO: ${{ env.KV_CACHE_USAGE_RATIO }}
+ REQUESTS_PROCESSING: ${{ env.REQUESTS_PROCESSING }}
+ run: |
+ set -eux
+
+ echo PROMPT_TOKENS_SECONDS_=${PROMPT_TOKENS_SECONDS//
/\n} >> $GITHUB_ENV
+ echo PREDICTED_TOKENS_SECONDS_=${PREDICTED_TOKENS_SECONDS//
/\n} >> $GITHUB_ENV
+ echo KV_CACHE_USAGE_RATIO_=${KV_CACHE_USAGE_RATIO//
/\n} >> $GITHUB_ENV
+ echo REQUESTS_PROCESSING_=${REQUESTS_PROCESSING//
/\n} >> $GITHUB_ENV
+
- uses: actions/upload-artifact@v4
with:
name: benchmark-results
diff --git a/examples/server/bench/bench.py b/examples/server/bench/bench.py
index a12eaf2a3d4ab7..bb45a9ea8c35ef 100644
--- a/examples/server/bench/bench.py
+++ b/examples/server/bench/bench.py
@@ -140,7 +140,7 @@ def main(args_in: list[str] | None = None) -> None:
plt.ylabel(ylabel, fontsize=22)
plt.xlabel(xlabel, fontsize=14, wrap=True)
plt.gca().xaxis.set_major_locator(matplotlib.dates.MinuteLocator())
- plt.gca().xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y%m%d %H:%M:%S"))
+ plt.gca().xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y-%m-%d %H:%M:%S"))
plt.gcf().autofmt_xdate()
# Remove borders
@@ -157,7 +157,7 @@ def main(args_in: list[str] | None = None) -> None:
mermaid = f"""```mermaid
xychart-beta
title "{title}"
- x-axis "{xlabel}" ["{'", "'.join([datetime.fromtimestamp(int(ts)).strftime("%Y%m%d %H:%M:%S") for ts in timestamps])}"]
+ x-axis "{xlabel}" ["{'", "'.join([datetime.fromtimestamp(int(ts)).strftime("%Y-%m-%d %H:%M:%S") for ts in timestamps])}"]
y-axis "{ylabel}"
line [{', '.join([str(round(float(value))) for value in metric_values])}]
```