HabanaAI · afierka-intel · Feb 10, 2025 · Jan 23, 2025 · Jan 27, 2025 · Jan 28, 2025
@@ -1,3 +1,4 @@
 lm_eval
 pytest
 tokenizers<0.20.2
+transformers<=4.46.3
@@ -69,6 +69,20 @@ stages:
       - name: test_long_context
         flavor: g2
         command: VLLM_SKIP_WARMUP=true pytest -v tests/lora/test_long_context_hpu.py::test_quality
+  - name: tests_multimodal
+    steps:
+      - name: multimodal_small_g3_tp1
+        flavor: g3
+        command: cd .jenkins/vision && bash run-tests.sh -c configs/models-small.txt -t 1
+      - name: multimodal_small_g3_tp2
+        flavor: g3.s
+        command: cd .jenkins/vision && bash run-tests.sh -c configs/models-small.txt -t 2
+      - name: multimodal_small_g3_tp1_mss
+        flavor: g3
+        command: cd .jenkins/vision && bash run-tests.sh -c configs/models-mss.txt -t 1
+      - name: multimodal_small_g3_tp2_mss
+        flavor: g3.s
+        command: cd .jenkins/vision && bash run-tests.sh -c configs/models-mss.txt -t 2
   - name: tests_int4_quantization
     steps:
       - name: test_awq

@@ -0,0 +1,6 @@
+model_name: "/mnt/weka/data/pytorch/llama3.2/Meta-Llama-3.2-11B-Vision-Instruct"
+dtype: "bfloat16"
+max_model_len: 1024
+max_num_seqs: 32
+num_prompts: 4
+num_scheduler_steps: 10
@@ -0,0 +1,5 @@
+model_name: "/mnt/weka/data/pytorch/llama3.2/Meta-Llama-3.2-11B-Vision-Instruct"
+dtype: "bfloat16"
+max_model_len: 1024
+max_num_seqs: 32
+num_prompts: 4
@@ -0,0 +1 @@
+Meta-Llama-3.2-11B-Vision-Instruct-mss.yaml
@@ -0,0 +1 @@
+Meta-Llama-3.2-11B-Vision-Instruct.yaml
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+usage() {
+    echo``
+    echo "Runs simple request check on multimodal models using vllm"
+    echo
+    echo "usage: ${0} <options>"
+    echo
+    echo "  -c    - path to the test data config (e.g. configs/small-models.txt)"
+    echo "  -t    - tensor parallel size"
+    echo
+}
+
+SUCCESS=0
+
+while getopts "c:t:" OPT; do
+  case ${OPT} in
+    c ) 
+        CONFIG="$OPTARG"
+        ;;
+    t )
+        TP_SIZE="$OPTARG"
+        ;;
+    \? )
+        usage
+        exit 1
+        ;;
+  esac
+done
+
+# Parse list of configs.
+IFS=$'\n' read -d '' -r -a MODEL_CONFIGS < "$CONFIG"
+
+for MODEL_CONFIG in "${MODEL_CONFIGS[@]}"
+do
+    LOCAL_SUCCESS=0
+
+    echo "=== RUNNING MODEL: $MODEL_CONFIG WITH TP SIZE: $TP_SIZE==="
+
+    export TEST_DATA_FILE=$PWD/configs/${MODEL_CONFIG}
+    export TP_SIZE=$TP_SIZE
+    export PT_HPU_ENABLE_LAZY_COLLECTIVES=true
+    export VLLM_SKIP_WARMUP=true
+    export TQDM_BAR_FORMAT="{desc}: {percentage:3.0f}% {bar:10} | {n_fmt}/{total_fmt} [{elapsed}<{remaining}]" 
+    RANDOM_SUFFIX=$(tr -dc A-Za-z0-9 </dev/urandom | head -c 4; echo)
+    JUNIT_FAMILY=""
+    JUNIT_XML=""
+    if [[ -n "$TEST_RESULTS_DIR" ]]; then
+        LOG_DIR=$TEST_RESULTS_DIR
+        LOG_FILENAME="test_${MODEL_CONFIG}_${RANDOM_SUFFIX}.xml"
+        LOG_PATH="${LOG_DIR}/${LOG_FILENAME}"
+        JUNIT_FAMILY="-o junit_family=xunit1"
+        JUNIT_XML="--junitxml=${LOG_PATH}"
+    fi
+    pytest -s test_enc_dec_model.py "$JUNIT_FAMILY" "$JUNIT_XML" || LOCAL_SUCCESS=$?
+
+    if [[ $LOCAL_SUCCESS == 0 ]]; then
+        echo "=== PASSED MODEL: ${MODEL_CONFIG} ==="
+    else
+        echo "=== FAILED MODEL: ${MODEL_CONFIG} ==="
+    fi
+
+    SUCCESS=$((SUCCESS + LOCAL_SUCCESS))
+
+done
+
+if [ "${SUCCESS}" -eq "0" ]; then
+    exit 0
+else
+    exit 1
+fi
@@ -0,0 +1,120 @@
+import atexit
+import os
+from pathlib import Path
+
+import yaml
+from PIL import Image
+from transformers import AutoTokenizer
+
+from vllm import LLM, SamplingParams
+
+TEST_DATA_FILE = os.environ.get(
+    "TEST_DATA_FILE",
+    ".jenkins/vision/configs/Meta-Llama-3.2-11B-Vision-Instruct.yaml")
+
+TP_SIZE = int(os.environ.get("TP_SIZE", 1))
+
+
+def fail_on_exit():
+    os._exit(1)
+
+
+def launch_enc_dec_model(config, question):
+    model_name = config.get('model_name')
+    dtype = config.get('dtype', 'bfloat16')
+    max_num_seqs = config.get('max_num_seqs', 128)
+    max_model_len = config.get('max_model_len', 4096)
+    tensor_parallel_size = TP_SIZE
+    num_scheduler_steps = config.get('num_scheduler_steps', 1)
+    llm = LLM(
+        model=model_name,
+        dtype=dtype,
+        tensor_parallel_size=tensor_parallel_size,
+        num_scheduler_steps=num_scheduler_steps,
+        max_model_len=max_model_len,
+        max_num_seqs=max_num_seqs,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    messages = [{
+        "role":
+        "user",
+        "content": [{
+            "type": "image"
+        }, {
+            "type": "text",
+            "text": f"{question}"
+        }]
+    }]
+    prompt = tokenizer.apply_chat_template(messages,
+                                           add_generation_prompt=True,
+                                           tokenize=False)
+    return llm, prompt
+
+
+def get_input():
+    image = Image.open("data/cherry_blossom.jpg").convert("RGB")
+    img_question = "What is the content of this image?"
+
+    return {
+        "image": image,
+        "question": img_question,
+    }
+
+
+def get_current_gaudi_platform():
+
+    #Inspired by: https://github.com/HabanaAI/Model-References/blob/a87c21f14f13b70ffc77617b9e80d1ec989a3442/PyTorch/computer_vision/classification/torchvision/utils.py#L274
+
+    import habana_frameworks.torch.utils.experimental as htexp
+
+    device_type = htexp._get_device_type()
+
+    if device_type == htexp.synDeviceType.synDeviceGaudi:
+        return "Gaudi1"
+    elif device_type == htexp.synDeviceType.synDeviceGaudi2:
+        return "Gaudi2"
+    elif device_type == htexp.synDeviceType.synDeviceGaudi3:
+        return "Gaudi3"
+    else:
+        raise ValueError(
+            f"Unsupported device: the device type is {device_type}.")
+
+
+def test_enc_dec_model(record_xml_attribute, record_property):
+    try:
+        config = yaml.safe_load(
+            Path(TEST_DATA_FILE).read_text(encoding="utf-8"))
+        # Record JUnitXML test name
+        platform = get_current_gaudi_platform()
+        testname = (f'test_{Path(TEST_DATA_FILE).stem}_{platform}_'
+                    f'tp{TP_SIZE}')
+        record_xml_attribute("name", testname)
+
+        mm_input = get_input()
+        image = mm_input["image"]
+        question = mm_input["question"]
+        llm, prompt = launch_enc_dec_model(config, question)
+
+        sampling_params = SamplingParams(temperature=0.0,
+                                         max_tokens=100,
+                                         stop_token_ids=None)
+
+        num_prompts = config.get('num_prompts', 1)
+        inputs = [{
+            "prompt": prompt,
+            "multi_modal_data": {
+                "image": image
+            },
+        } for _ in range(num_prompts)]
+
+        outputs = llm.generate(inputs, sampling_params=sampling_params)
+
+        for o in outputs:
+            generated_text = o.outputs[0].text
+            assert generated_text, "Generated text is empty"
+            print(generated_text)
+        os._exit(0)
+
+    except Exception as exc:
+        atexit.register(fail_on_exit)
+        raise exc