diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index f4e48cafbe025d..9197c82b515cfb 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -102,7 +102,7 @@ jobs: --branch ${{ github.head_ref || github.ref_name }} \ --commit ${{ github.sha }} \ --scenario script.js \ - --duration 2m \ + --duration 1m \ --hf-repo ggml-org/models \ --hf-file phi-2/ggml-model-q4_0.gguf \ --model-path-prefix /models \ @@ -122,6 +122,21 @@ jobs: echo KV_CACHE_USAGE_RATIO_=${KV_CACHE_USAGE_RATIO//
/\n} >> $GITHUB_ENV echo REQUESTS_PROCESSING_=${REQUESTS_PROCESSING//
/\n} >> $GITHUB_ENV + - name: Server bench + id: server_bench_mermaid + env: + PROMPT_TOKENS_SECONDS: ${{ env.PROMPT_TOKENS_SECONDS }} + PREDICTED_TOKENS_SECONDS: ${{ env.PREDICTED_TOKENS_SECONDS }} + KV_CACHE_USAGE_RATIO: ${{ env.KV_CACHE_USAGE_RATIO }} + REQUESTS_PROCESSING: ${{ env.REQUESTS_PROCESSING }} + run: | + set -eux + + echo PROMPT_TOKENS_SECONDS_=${PROMPT_TOKENS_SECONDS//
/\n} >> $GITHUB_ENV + echo PREDICTED_TOKENS_SECONDS_=${PREDICTED_TOKENS_SECONDS//
/\n} >> $GITHUB_ENV + echo KV_CACHE_USAGE_RATIO_=${KV_CACHE_USAGE_RATIO//
/\n} >> $GITHUB_ENV + echo REQUESTS_PROCESSING_=${REQUESTS_PROCESSING//
/\n} >> $GITHUB_ENV + - uses: actions/upload-artifact@v4 with: name: benchmark-results diff --git a/examples/server/bench/bench.py b/examples/server/bench/bench.py index a12eaf2a3d4ab7..bb45a9ea8c35ef 100644 --- a/examples/server/bench/bench.py +++ b/examples/server/bench/bench.py @@ -140,7 +140,7 @@ def main(args_in: list[str] | None = None) -> None: plt.ylabel(ylabel, fontsize=22) plt.xlabel(xlabel, fontsize=14, wrap=True) plt.gca().xaxis.set_major_locator(matplotlib.dates.MinuteLocator()) - plt.gca().xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y%m%d %H:%M:%S")) + plt.gca().xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y-%m-%d %H:%M:%S")) plt.gcf().autofmt_xdate() # Remove borders @@ -157,7 +157,7 @@ def main(args_in: list[str] | None = None) -> None: mermaid = f"""```mermaid xychart-beta title "{title}" - x-axis "{xlabel}" ["{'", "'.join([datetime.fromtimestamp(int(ts)).strftime("%Y%m%d %H:%M:%S") for ts in timestamps])}"] + x-axis "{xlabel}" ["{'", "'.join([datetime.fromtimestamp(int(ts)).strftime("%Y-%m-%d %H:%M:%S") for ts in timestamps])}"] y-axis "{ylabel}" line [{', '.join([str(round(float(value))) for value in metric_values])}] ```