move energy star and test examples

huggingface · Dec 9, 2024 · 5d5c88d · 5d5c88d
1 parent 5931750
commit 5d5c88d
Show file tree

Hide file tree

Showing 53 changed files with 178 additions and 151 deletions.
diff --git a/.github/workflows/test_api_cpu.yaml b/.github/workflows/test_api_cpu.yaml
@@ -47,8 +47,14 @@ jobs:
           pip install -e .[testing,timm,diffusers,codecarbon]
 
       - name: Run tests
+        run: |
+          pytest tests/test_api.py -s -k "api and cpu"
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           PUSH_REPO_ID: optimum-benchmark/cpu
+
+      - name: Run examples
         run: |
-          pytest tests/test_api.py -s -k "api and cpu"
+          pytest tests/test_examples.py -s -k "api and cpu"
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
diff --git a/.github/workflows/test_api_cuda.yaml b/.github/workflows/test_api_cuda.yaml
@@ -45,8 +45,14 @@ jobs:
           pip install -e .[testing,timm,diffusers,codecarbon]
 
       - name: Run tests
+        run: |
+          pytest tests/test_api.py -x -s -k "api and cuda"
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           PUSH_REPO_ID: optimum-benchmark/cuda
+
+      - name: Run examples
         run: |
-          pytest tests/test_api.py -x -s -k "api and cuda"
+          pytest tests/test_examples.py -x -s -k "api and cuda"
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
diff --git a/.github/workflows/test_cli_cpu_ipex.yaml b/.github/workflows/test_cli_cpu_ipex.yaml
@@ -49,3 +49,6 @@ jobs:
 
       - name: Run tests
         run: pytest tests/test_cli.py -s -k "cli and cpu and ipex"
+
+      - name: Run examples
+        run: pytest tests/test_examples.py -s -k "cli and cpu and ipex"
diff --git a/.github/workflows/test_cli_cpu_llama_cpp.yaml b/.github/workflows/test_cli_cpu_llama_cpp.yaml
@@ -49,3 +49,6 @@ jobs:
 
       - name: Run tests
         run: pytest tests/test_cli.py -s -k "llama_cpp"
+
+      - name: Run examples
+        run: pytest tests/test_examples.py -s -k "llama_cpp"
diff --git a/.github/workflows/test_cli_cpu_onnxruntime.yaml b/.github/workflows/test_cli_cpu_onnxruntime.yaml
@@ -49,3 +49,6 @@ jobs:
 
       - name: Run tests
         run: pytest tests/test_cli.py -s -k "cli and cpu and onnxruntime"
+
+      - name: Run examples
+        run: pytest tests/test_examples.py -s -k "cli and cpu and onnxruntime"
diff --git a/.github/workflows/test_cli_cpu_openvino.yaml b/.github/workflows/test_cli_cpu_openvino.yaml
@@ -49,3 +49,6 @@ jobs:
 
       - name: Run tests
         run: pytest tests/test_cli.py -s -k "cli and cpu and openvino"
+
+      - name: Run examples
+        run: pytest tests/test_examples.py -s -k "cli and cpu and openvino"
diff --git a/.github/workflows/test_cli_cpu_py_txi.yaml b/.github/workflows/test_cli_cpu_py_txi.yaml
@@ -49,3 +49,6 @@ jobs:
 
       - name: Run tests
         run: pytest tests/test_cli.py -s -k "cli and cpu and py_txi"
+
+      - name: Run examples
+        run: pytest tests/test_examples.py -s -k "cli and cpu and py_txi"
diff --git a/.github/workflows/test_cli_cpu_pytorch.yaml b/.github/workflows/test_cli_cpu_pytorch.yaml
@@ -49,3 +49,6 @@ jobs:
 
       - name: Run tests
         run: pytest tests/test_cli.py -s -k "cli and cpu and pytorch"
+
+      - name: Run examples
+        run: pytest tests/test_examples.py -s -k "cli and cpu and pytorch"
diff --git a/.github/workflows/test_cli_cuda_onnxruntime.yaml b/.github/workflows/test_cli_cuda_onnxruntime.yaml
@@ -48,3 +48,6 @@ jobs:
       - name: Run tests
         run: |
           pytest tests/test_cli.py -x -s -k "cli and cuda and onnxruntime"
+
+      - name: Run examples
+        run: pytest tests/test_examples.py -x -s -k "cli and cuda and onnxruntime"
diff --git a/.github/workflows/test_cli_cuda_py_txi.yaml b/.github/workflows/test_cli_cuda_py_txi.yaml
@@ -49,3 +49,6 @@ jobs:
 
       - name: Run tests
         run: pytest tests/test_cli.py -x -s -k "cli and cuda and py_txi"
+
+      - name: Run examples
+        run: pytest tests/test_examples.py -x -s -k "cli and cuda and (tgi or tei)"
diff --git a/.github/workflows/test_cli_cuda_pytorch.yaml b/.github/workflows/test_cli_cuda_pytorch.yaml
@@ -50,6 +50,9 @@ jobs:
         run: |
           pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed)"
 
+      - name: Run examples
+        run: pytest tests/test_examples.py -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed)"
+
   run_cli_cuda_pytorch_multi_gpu_tests:
     if: ${{
       (github.event_name == 'push') ||

diff --git a/.github/workflows/test_cli_cuda_tensorrt_llm.yaml b/.github/workflows/test_cli_cuda_tensorrt_llm.yaml
@@ -50,6 +50,9 @@ jobs:
         run: |
           pytest tests/test_cli.py -x -s -k "cli and cuda and tensorrt_llm and not (tp or pp)"
 
+      - name: Run examples
+        run: pytest tests/test_examples.py -x -s -k "cli and cuda and tensorrt_llm and not (tp or pp)"
+
   cli_cuda_tensorrt_llm_multi_gpu_tests:
     if: ${{
       (github.event_name == 'push') ||

diff --git a/.github/workflows/test_cli_cuda_torch_ort.yaml b/.github/workflows/test_cli_cuda_torch_ort.yaml
@@ -51,6 +51,10 @@ jobs:
         run: |
           pytest tests/test_cli.py -x -s -k "cli and cuda and torch_ort and not (dp or ddp or device_map) and not (peft)"
 
+      - name: Run examples
+        run: |
+          pytest tests/test_examples.py -x -s -k "cli and cuda and torch_ort and not (dp or ddp or device_map) and not (peft)"
+
   run_cli_cuda_torch_ort_multi_gpu_tests:
     if: ${{
       (github.event_name == 'push') ||

diff --git a/.github/workflows/test_cli_cuda_vllm.yaml b/.github/workflows/test_cli_cuda_vllm.yaml
@@ -50,6 +50,10 @@ jobs:
         run: |
           FORCE_SEQUENTIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and vllm and not (tp or pp)"
 
+      - name: Run examples (sequential)
+        run: |
+          FORCE_SEQUENTIAL=1 pytest tests/test_examples.py -x -s -k "cli and cuda and vllm and not (tp or pp)"
+
   run_cli_cuda_vllm_multi_gpu_tests:
     if: ${{
       (github.event_name == 'push') ||

diff --git a/.github/workflows/test_cli_energy_star.yaml → .github/workflows/test_energy_star.yaml b/.github/workflows/test_cli_energy_star.yaml → .github/workflows/test_energy_star.yaml
@@ -20,13 +20,11 @@ concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
 
 jobs:
-  run_cli_energy_star_tests:
+  run_energy_star_tests:
     if: ${{
       (github.event_name == 'push') ||
       (github.event_name == 'workflow_dispatch') ||
-      contains( github.event.pull_request.labels.*.name, 'cli') ||
-      contains( github.event.pull_request.labels.*.name, 'energy_star') ||
-      contains( github.event.pull_request.labels.*.name, 'cli_energy_star')
+      contains( github.event.pull_request.labels.*.name, 'energy_star')
       }}
 
     runs-on:

diff --git a/examples/energy_star/_base_.yaml → energy_star/_base_.yaml b/examples/energy_star/_base_.yaml → energy_star/_base_.yaml
diff --git a/...gy_star/automatic_speech_recognition.yaml → ...gy_star/automatic_speech_recognition.yaml b/...gy_star/automatic_speech_recognition.yaml → ...gy_star/automatic_speech_recognition.yaml
diff --git a/...les/energy_star/image_classification.yaml → energy_star/image_classification.yaml b/...les/energy_star/image_classification.yaml → energy_star/image_classification.yaml
diff --git a/examples/energy_star/image_to_text.yaml → energy_star/image_to_text.yaml b/examples/energy_star/image_to_text.yaml → energy_star/image_to_text.yaml
diff --git a/examples/energy_star/object_detection.yaml → energy_star/object_detection.yaml b/examples/energy_star/object_detection.yaml → energy_star/object_detection.yaml
diff --git a/examples/energy_star/question_answering.yaml → energy_star/question_answering.yaml b/examples/energy_star/question_answering.yaml → energy_star/question_answering.yaml
diff --git a/...ples/energy_star/sentence_similarity.yaml → energy_star/sentence_similarity.yaml b/...ples/energy_star/sentence_similarity.yaml → energy_star/sentence_similarity.yaml
diff --git a/examples/energy_star/summarization.yaml → energy_star/summarization.yaml b/examples/energy_star/summarization.yaml → energy_star/summarization.yaml
diff --git a/...es/energy_star/t5_question_answering.yaml → energy_star/t5_question_answering.yaml b/...es/energy_star/t5_question_answering.yaml → energy_star/t5_question_answering.yaml
diff --git a/examples/energy_star/t5_summarization.yaml → energy_star/t5_summarization.yaml b/examples/energy_star/t5_summarization.yaml → energy_star/t5_summarization.yaml
diff --git a/...s/energy_star/t5_text_classification.yaml → energy_star/t5_text_classification.yaml b/...s/energy_star/t5_text_classification.yaml → energy_star/t5_text_classification.yaml
diff --git a/examples/energy_star/t5_text_generation.yaml → energy_star/t5_text_generation.yaml b/examples/energy_star/t5_text_generation.yaml → energy_star/t5_text_generation.yaml
diff --git a/...ples/energy_star/text_classification.yaml → energy_star/text_classification.yaml b/...ples/energy_star/text_classification.yaml → energy_star/text_classification.yaml
diff --git a/examples/energy_star/text_generation.yaml → energy_star/text_generation.yaml b/examples/energy_star/text_generation.yaml → energy_star/text_generation.yaml
diff --git a/examples/energy_star/text_to_image.yaml → energy_star/text_to_image.yaml b/examples/energy_star/text_to_image.yaml → energy_star/text_to_image.yaml
diff --git a/examples/ipex_bert.yaml → examples/cpu_ipex_bert.yaml b/examples/ipex_bert.yaml → examples/cpu_ipex_bert.yaml
@@ -6,24 +6,25 @@ defaults:
   - _base_
   - _self_
 
-name: ipex_bert
+name: cpu_ipex_bert
 
 launcher:
   numactl: true
   numactl_kwargs:
     cpunodebind: 0
     membind: 0
 
+backend:
+  device: cpu
+  export: true
+  no_weights: false
+  torch_dtype: bfloat16
+  model: google-bert/bert-base-uncased
+
 scenario:
-  latency: true
   memory: true
+  latency: true
+
   input_shapes:
     batch_size: 1
     sequence_length: 128
-
-backend:
-  device: cpu
-  no_weights: false
-  export: true
-  torch_dtype: bfloat16
-  model: bert-base-uncased
diff --git a/examples/ipex_llama.yaml → examples/cpu_ipex_llama.yaml b/examples/ipex_llama.yaml → examples/cpu_ipex_llama.yaml
@@ -6,32 +6,33 @@ defaults:
   - _base_
   - _self_
 
-name: ipex_llama
+name: cpu_ipex_llama
 
 launcher:
   numactl: true
   numactl_kwargs:
     cpunodebind: 0
     membind: 0
 
+backend:
+  device: cpu
+  export: true
+  no_weights: false
+  torch_dtype: bfloat16
+  model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
+
 scenario:
-  latency: true
   memory: true
+  latency: true
 
   warmup_runs: 10
   iterations: 10
   duration: 10
-  
+
   input_shapes:
     batch_size: 1
     sequence_length: 256
+
   generate_kwargs:
     max_new_tokens: 32
     min_new_tokens: 32
-
-backend:
-  device: cpu
-  export: true
-  no_weights: false
-  torch_dtype: bfloat16
-  model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
diff --git a/examples/llama_cpp_embedding.yaml → examples/cpu_llama_cpp_embedding.yaml b/examples/llama_cpp_embedding.yaml → examples/cpu_llama_cpp_embedding.yaml
@@ -1,26 +1,24 @@
 defaults:
   - benchmark
   - scenario: inference
-  - launcher: inline
   - backend: llama_cpp
+  - launcher: process
   - _base_
   - _self_
 
-name: llama_cpp_llama
+name: cpu_llama_cpp_embedding
 
 backend:
-  device: mps
-  model: nomic-ai/nomic-embed-text-v1.5-GGUF
+  device: cpu
   task: feature-extraction
+  model: nomic-ai/nomic-embed-text-v1.5-GGUF
   filename: nomic-embed-text-v1.5.Q4_0.gguf
 
 scenario:
   input_shapes:
     batch_size: 1
     sequence_length: 256
-    vocab_size: 30000
-    type_vocab_size: 1
-    max_position_embeddings: 512
+
   generate_kwargs:
     max_new_tokens: 100
     min_new_tokens: 100
diff --git a/examples/llama_cpp_text_generation.yaml → examples/cpu_llama_cpp_text_generation.yaml b/examples/llama_cpp_text_generation.yaml → examples/cpu_llama_cpp_text_generation.yaml
@@ -1,25 +1,24 @@
 defaults:
   - benchmark
   - scenario: inference
-  - launcher: inline
   - backend: llama_cpp
+  - launcher: process
   - _base_
   - _self_
 
-name: llama_cpp_llama
+name: cpu_llama_cpp_text_generation
 
 backend:
-  device: mps
-  model: TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF
+  device: cpu
   task: text-generation
+  model: TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF
   filename: tinyllama-1.1b-chat-v1.0.Q4_0.gguf
 
-
 scenario:
   input_shapes:
     batch_size: 1
     sequence_length: 256
-    vocab_size: 32000
+
   generate_kwargs:
     max_new_tokens: 100
     min_new_tokens: 100
diff --git a/examples/onnxruntime_static_quant_vit.yaml → ...les/cpu_onnxruntime_static_quant_vit.yaml b/examples/onnxruntime_static_quant_vit.yaml → ...les/cpu_onnxruntime_static_quant_vit.yaml
@@ -6,10 +6,11 @@ defaults:
   - _base_
   - _self_
 
-name: onnxruntime_static_quant_vit
+name: cpu_onnxruntime_static_quant_vit
 
 backend:
   device: cpu
+  export: true
   no_weights: true
   model: google/vit-base-patch16-224
   quantization: true

diff --git a/examples/onnxruntime_timm.yaml → examples/cpu_onnxruntime_timm.yaml b/examples/onnxruntime_timm.yaml → examples/cpu_onnxruntime_timm.yaml
@@ -10,7 +10,8 @@ name: onnxruntime_timm
 
 backend:
   device: cpu
-  model: timm/mobilenetv3_large_100.ra_in1k
+  export: true
+  model: timm/tiny_vit_21m_224.in1k
 
 scenario:
   memory: true

diff --git a/examples/openvino_static_quant_bert.yaml → examples/cpu_openvino_8bit.yaml b/examples/openvino_static_quant_bert.yaml → examples/cpu_openvino_8bit.yaml
@@ -6,16 +6,16 @@ defaults:
   - _base_
   - _self_
 
-name: openvino_static_quant_bert
+name: openvino_static_quant
 
 backend:
   device: cpu
-  no_weights: true
-  model: bert-base-uncased
-  quantization: true
-  calibration: true
   reshape: true
+  no_weights: true
+  load_in_8bit: true
+  model: google-bert/bert-base-uncased
 
 scenario:
   input_shapes:
     batch_size: 1
+    sequence_length: 16
diff --git a/examples/openvino_diffusion.yaml → examples/cpu_openvino_diffusion.yaml b/examples/openvino_diffusion.yaml → examples/cpu_openvino_diffusion.yaml
@@ -9,11 +9,10 @@ defaults:
 name: openvino_diffusion
 
 backend:
+  half: true
   device: cpu
-  model: stabilityai/stable-diffusion-2-1
-  reshape: true
   export: true
-  half: true
+  model: stabilityai/stable-diffusion-2-1
 
 scenario:
   input_shapes:

diff --git a/examples/pytorch_bert.py → examples/cuda_pytorch_bert.py b/examples/pytorch_bert.py → examples/cuda_pytorch_bert.py
@@ -11,12 +11,13 @@
     print(f"Failed to get username from Hugging Face Hub: {e}")
     USERNAME = None
 
-BENCHMARK_NAME = "pytorch_bert"
+BENCHMARK_NAME = "cuda_pytorch_bert"
+MODEL = "google-bert/bert-base-uncased"
 
 
 def run_benchmark():
     launcher_config = ProcessConfig(device_isolation=True, device_isolation_action="warn")
-    backend_config = PyTorchConfig(device="cuda", device_ids="0", no_weights=True, model="bert-base-uncased")
+    backend_config = PyTorchConfig(device="cuda", device_ids="0", no_weights=True, model=MODEL)
     scenario_config = InferenceConfig(memory=True, latency=True, input_shapes={"batch_size": 1, "sequence_length": 128})
     benchmark_config = BenchmarkConfig(
         name=BENCHMARK_NAME,

diff --git a/examples/pytorch_bert.yaml → examples/cuda_pytorch_bert.yaml b/examples/pytorch_bert.yaml → examples/cuda_pytorch_bert.yaml
diff --git a/examples/pytorch_llama.py → examples/cuda_pytorch_llama.py b/examples/pytorch_llama.py → examples/cuda_pytorch_llama.py
@@ -11,7 +11,8 @@
     print(f"Failed to get username from Hugging Face Hub: {e}")
     USERNAME = None
 
-BENCHMARK_NAME = "pytorch-llama"
+BENCHMARK_NAME = "cuda_pytorch_llama"
+MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 
 WEIGHTS_CONFIGS = {
     "float16": {
@@ -40,10 +41,10 @@
 def run_benchmark(weight_config: str):
     launcher_config = ProcessConfig(device_isolation=True, device_isolation_action="warn")
     backend_config = PyTorchConfig(
+        model=MODEL,
         device="cuda",
         device_ids="0",
         no_weights=True,
-        model="gpt2",
         **WEIGHTS_CONFIGS[weight_config],
     )
     scenario_config = InferenceConfig(