Skip to content

Commit

Permalink
support multi models
Browse files Browse the repository at this point in the history
Signed-off-by: Kunshang Ji <[email protected]>
Signed-off-by: Chendi Xue <[email protected]>
Signed-off-by: Chendi.Xue <[email protected]>
  • Loading branch information
jikunshang authored and xuechendi committed Dec 2, 2024
1 parent 338a00a commit 0915219
Show file tree
Hide file tree
Showing 9 changed files with 211 additions and 1,760 deletions.
39 changes: 39 additions & 0 deletions scripts/run_multi_models.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/bin/bash

# VLLM_SKIP_WARMUP=true python3 -m \
# vllm.entrypoints.openai.mm_api_server \
# --models mistralai/Mistral-7B-Instruct-v0.3 meta-llama/Llama-3.1-8B-Instruct \
# --port 8080 --device hpu --dtype bfloat16 \
# --gpu-memory-utilization=0.3 --use-v2-block-manager --max-model-len 4096 2>&1 > multi_models.log &


bs=128
in_len=1024
out_len=1024


python benchmarks/benchmark_serving.py \
--backend vllm \
--model mistralai/Mistral-7B-Instruct-v0.3 \
--dataset-name sonnet \
--dataset-path benchmarks/sonnet.txt \
--request-rate 512 \
--num-prompts ${bs} \
--port 8080 \
--sonnet-input-len ${in_len} \
--sonnet-output-len ${out_len} \
--sonnet-prefix-len 100 \
--save-result > mistral-sonnet-1.log 2>&1 &

python benchmarks/benchmark_serving.py \
--backend vllm \
--model meta-llama/Llama-3.1-8B-Instruct \
--dataset-name sonnet \
--dataset-path benchmarks/sonnet.txt \
--request-rate 512 \
--num-prompts ${bs} \
--port 8080 \
--sonnet-input-len ${in_len} \
--sonnet-output-len ${out_len} \
--sonnet-prefix-len 100 \
--save-result > llama-sonnet-1.log 2>&1 &
Loading

0 comments on commit 0915219

Please sign in to comment.