forked from vllm-project/vllm
-
Notifications
You must be signed in to change notification settings - Fork 76
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add basic CI checks for enc dec models (#741)
Co-authored-by: Karol Damaszke <[email protected]>
- Loading branch information
Showing
9 changed files
with
219 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
lm_eval | ||
pytest | ||
tokenizers<0.20.2 | ||
transformers<=4.46.3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
6 changes: 6 additions & 0 deletions
6
.jenkins/vision/configs/Meta-Llama-3.2-11B-Vision-Instruct-mss.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
model_name: "/mnt/weka/data/pytorch/llama3.2/Meta-Llama-3.2-11B-Vision-Instruct" | ||
dtype: "bfloat16" | ||
max_model_len: 1024 | ||
max_num_seqs: 32 | ||
num_prompts: 4 | ||
num_scheduler_steps: 10 |
5 changes: 5 additions & 0 deletions
5
.jenkins/vision/configs/Meta-Llama-3.2-11B-Vision-Instruct.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
model_name: "/mnt/weka/data/pytorch/llama3.2/Meta-Llama-3.2-11B-Vision-Instruct" | ||
dtype: "bfloat16" | ||
max_model_len: 1024 | ||
max_num_seqs: 32 | ||
num_prompts: 4 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Meta-Llama-3.2-11B-Vision-Instruct-mss.yaml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Meta-Llama-3.2-11B-Vision-Instruct.yaml |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
#!/bin/bash | ||
|
||
usage() { | ||
echo`` | ||
echo "Runs simple request check on multimodal models using vllm" | ||
echo | ||
echo "usage: ${0} <options>" | ||
echo | ||
echo " -c - path to the test data config (e.g. configs/small-models.txt)" | ||
echo " -t - tensor parallel size" | ||
echo | ||
} | ||
|
||
SUCCESS=0 | ||
|
||
while getopts "c:t:" OPT; do | ||
case ${OPT} in | ||
c ) | ||
CONFIG="$OPTARG" | ||
;; | ||
t ) | ||
TP_SIZE="$OPTARG" | ||
;; | ||
\? ) | ||
usage | ||
exit 1 | ||
;; | ||
esac | ||
done | ||
|
||
# Parse list of configs. | ||
IFS=$'\n' read -d '' -r -a MODEL_CONFIGS < "$CONFIG" | ||
|
||
for MODEL_CONFIG in "${MODEL_CONFIGS[@]}" | ||
do | ||
LOCAL_SUCCESS=0 | ||
|
||
echo "=== RUNNING MODEL: $MODEL_CONFIG WITH TP SIZE: $TP_SIZE===" | ||
|
||
export TEST_DATA_FILE=$PWD/configs/${MODEL_CONFIG} | ||
export TP_SIZE=$TP_SIZE | ||
export PT_HPU_ENABLE_LAZY_COLLECTIVES=true | ||
export VLLM_SKIP_WARMUP=true | ||
export TQDM_BAR_FORMAT="{desc}: {percentage:3.0f}% {bar:10} | {n_fmt}/{total_fmt} [{elapsed}<{remaining}]" | ||
RANDOM_SUFFIX=$(tr -dc A-Za-z0-9 </dev/urandom | head -c 4; echo) | ||
JUNIT_FAMILY="" | ||
JUNIT_XML="" | ||
if [[ -n "$TEST_RESULTS_DIR" ]]; then | ||
LOG_DIR=$TEST_RESULTS_DIR | ||
LOG_FILENAME="test_${MODEL_CONFIG}_${RANDOM_SUFFIX}.xml" | ||
LOG_PATH="${LOG_DIR}/${LOG_FILENAME}" | ||
JUNIT_FAMILY="-o junit_family=xunit1" | ||
JUNIT_XML="--junitxml=${LOG_PATH}" | ||
fi | ||
pytest -s test_enc_dec_model.py "$JUNIT_FAMILY" "$JUNIT_XML" || LOCAL_SUCCESS=$? | ||
|
||
if [[ $LOCAL_SUCCESS == 0 ]]; then | ||
echo "=== PASSED MODEL: ${MODEL_CONFIG} ===" | ||
else | ||
echo "=== FAILED MODEL: ${MODEL_CONFIG} ===" | ||
fi | ||
|
||
SUCCESS=$((SUCCESS + LOCAL_SUCCESS)) | ||
|
||
done | ||
|
||
if [ "${SUCCESS}" -eq "0" ]; then | ||
exit 0 | ||
else | ||
exit 1 | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
import atexit | ||
import os | ||
from pathlib import Path | ||
|
||
import yaml | ||
from PIL import Image | ||
from transformers import AutoTokenizer | ||
|
||
from vllm import LLM, SamplingParams | ||
|
||
TEST_DATA_FILE = os.environ.get( | ||
"TEST_DATA_FILE", | ||
".jenkins/vision/configs/Meta-Llama-3.2-11B-Vision-Instruct.yaml") | ||
|
||
TP_SIZE = int(os.environ.get("TP_SIZE", 1)) | ||
|
||
|
||
def fail_on_exit(): | ||
os._exit(1) | ||
|
||
|
||
def launch_enc_dec_model(config, question): | ||
model_name = config.get('model_name') | ||
dtype = config.get('dtype', 'bfloat16') | ||
max_num_seqs = config.get('max_num_seqs', 128) | ||
max_model_len = config.get('max_model_len', 4096) | ||
tensor_parallel_size = TP_SIZE | ||
num_scheduler_steps = config.get('num_scheduler_steps', 1) | ||
llm = LLM( | ||
model=model_name, | ||
dtype=dtype, | ||
tensor_parallel_size=tensor_parallel_size, | ||
num_scheduler_steps=num_scheduler_steps, | ||
max_model_len=max_model_len, | ||
max_num_seqs=max_num_seqs, | ||
) | ||
tokenizer = AutoTokenizer.from_pretrained(model_name) | ||
messages = [{ | ||
"role": | ||
"user", | ||
"content": [{ | ||
"type": "image" | ||
}, { | ||
"type": "text", | ||
"text": f"{question}" | ||
}] | ||
}] | ||
prompt = tokenizer.apply_chat_template(messages, | ||
add_generation_prompt=True, | ||
tokenize=False) | ||
return llm, prompt | ||
|
||
|
||
def get_input(): | ||
image = Image.open("data/cherry_blossom.jpg").convert("RGB") | ||
img_question = "What is the content of this image?" | ||
|
||
return { | ||
"image": image, | ||
"question": img_question, | ||
} | ||
|
||
|
||
def get_current_gaudi_platform(): | ||
|
||
#Inspired by: https://github.com/HabanaAI/Model-References/blob/a87c21f14f13b70ffc77617b9e80d1ec989a3442/PyTorch/computer_vision/classification/torchvision/utils.py#L274 | ||
|
||
import habana_frameworks.torch.utils.experimental as htexp | ||
|
||
device_type = htexp._get_device_type() | ||
|
||
if device_type == htexp.synDeviceType.synDeviceGaudi: | ||
return "Gaudi1" | ||
elif device_type == htexp.synDeviceType.synDeviceGaudi2: | ||
return "Gaudi2" | ||
elif device_type == htexp.synDeviceType.synDeviceGaudi3: | ||
return "Gaudi3" | ||
else: | ||
raise ValueError( | ||
f"Unsupported device: the device type is {device_type}.") | ||
|
||
|
||
def test_enc_dec_model(record_xml_attribute, record_property): | ||
try: | ||
config = yaml.safe_load( | ||
Path(TEST_DATA_FILE).read_text(encoding="utf-8")) | ||
# Record JUnitXML test name | ||
platform = get_current_gaudi_platform() | ||
testname = (f'test_{Path(TEST_DATA_FILE).stem}_{platform}_' | ||
f'tp{TP_SIZE}') | ||
record_xml_attribute("name", testname) | ||
|
||
mm_input = get_input() | ||
image = mm_input["image"] | ||
question = mm_input["question"] | ||
llm, prompt = launch_enc_dec_model(config, question) | ||
|
||
sampling_params = SamplingParams(temperature=0.0, | ||
max_tokens=100, | ||
stop_token_ids=None) | ||
|
||
num_prompts = config.get('num_prompts', 1) | ||
inputs = [{ | ||
"prompt": prompt, | ||
"multi_modal_data": { | ||
"image": image | ||
}, | ||
} for _ in range(num_prompts)] | ||
|
||
outputs = llm.generate(inputs, sampling_params=sampling_params) | ||
|
||
for o in outputs: | ||
generated_text = o.outputs[0].text | ||
assert generated_text, "Generated text is empty" | ||
print(generated_text) | ||
os._exit(0) | ||
|
||
except Exception as exc: | ||
atexit.register(fail_on_exit) | ||
raise exc |