Skip to content

Commit

Permalink
move energy star and test examples
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil committed Dec 9, 2024
1 parent 5931750 commit 5d5c88d
Show file tree
Hide file tree
Showing 53 changed files with 178 additions and 151 deletions.
8 changes: 7 additions & 1 deletion .github/workflows/test_api_cpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,14 @@ jobs:
pip install -e .[testing,timm,diffusers,codecarbon]
- name: Run tests
run: |
pytest tests/test_api.py -s -k "api and cpu"
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
PUSH_REPO_ID: optimum-benchmark/cpu

- name: Run examples
run: |
pytest tests/test_api.py -s -k "api and cpu"
pytest tests/test_examples.py -s -k "api and cpu"
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
8 changes: 7 additions & 1 deletion .github/workflows/test_api_cuda.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,14 @@ jobs:
pip install -e .[testing,timm,diffusers,codecarbon]
- name: Run tests
run: |
pytest tests/test_api.py -x -s -k "api and cuda"
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
PUSH_REPO_ID: optimum-benchmark/cuda

- name: Run examples
run: |
pytest tests/test_api.py -x -s -k "api and cuda"
pytest tests/test_examples.py -x -s -k "api and cuda"
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
3 changes: 3 additions & 0 deletions .github/workflows/test_cli_cpu_ipex.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,6 @@ jobs:
- name: Run tests
run: pytest tests/test_cli.py -s -k "cli and cpu and ipex"

- name: Run examples
run: pytest tests/test_examples.py -s -k "cli and cpu and ipex"
3 changes: 3 additions & 0 deletions .github/workflows/test_cli_cpu_llama_cpp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,6 @@ jobs:
- name: Run tests
run: pytest tests/test_cli.py -s -k "llama_cpp"

- name: Run examples
run: pytest tests/test_examples.py -s -k "llama_cpp"
3 changes: 3 additions & 0 deletions .github/workflows/test_cli_cpu_onnxruntime.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,6 @@ jobs:
- name: Run tests
run: pytest tests/test_cli.py -s -k "cli and cpu and onnxruntime"

- name: Run examples
run: pytest tests/test_examples.py -s -k "cli and cpu and onnxruntime"
3 changes: 3 additions & 0 deletions .github/workflows/test_cli_cpu_openvino.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,6 @@ jobs:
- name: Run tests
run: pytest tests/test_cli.py -s -k "cli and cpu and openvino"

- name: Run examples
run: pytest tests/test_examples.py -s -k "cli and cpu and openvino"
3 changes: 3 additions & 0 deletions .github/workflows/test_cli_cpu_py_txi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,6 @@ jobs:
- name: Run tests
run: pytest tests/test_cli.py -s -k "cli and cpu and py_txi"

- name: Run examples
run: pytest tests/test_examples.py -s -k "cli and cpu and py_txi"
3 changes: 3 additions & 0 deletions .github/workflows/test_cli_cpu_pytorch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,6 @@ jobs:
- name: Run tests
run: pytest tests/test_cli.py -s -k "cli and cpu and pytorch"

- name: Run examples
run: pytest tests/test_examples.py -s -k "cli and cpu and pytorch"
3 changes: 3 additions & 0 deletions .github/workflows/test_cli_cuda_onnxruntime.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,6 @@ jobs:
- name: Run tests
run: |
pytest tests/test_cli.py -x -s -k "cli and cuda and onnxruntime"
- name: Run examples
run: pytest tests/test_examples.py -x -s -k "cli and cuda and onnxruntime"
3 changes: 3 additions & 0 deletions .github/workflows/test_cli_cuda_py_txi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,6 @@ jobs:
- name: Run tests
run: pytest tests/test_cli.py -x -s -k "cli and cuda and py_txi"

- name: Run examples
run: pytest tests/test_examples.py -x -s -k "cli and cuda and (tgi or tei)"
3 changes: 3 additions & 0 deletions .github/workflows/test_cli_cuda_pytorch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ jobs:
run: |
pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed)"
- name: Run examples
run: pytest tests/test_examples.py -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed)"

run_cli_cuda_pytorch_multi_gpu_tests:
if: ${{
(github.event_name == 'push') ||
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/test_cli_cuda_tensorrt_llm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ jobs:
run: |
pytest tests/test_cli.py -x -s -k "cli and cuda and tensorrt_llm and not (tp or pp)"
- name: Run examples
run: pytest tests/test_examples.py -x -s -k "cli and cuda and tensorrt_llm and not (tp or pp)"

cli_cuda_tensorrt_llm_multi_gpu_tests:
if: ${{
(github.event_name == 'push') ||
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/test_cli_cuda_torch_ort.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ jobs:
run: |
pytest tests/test_cli.py -x -s -k "cli and cuda and torch_ort and not (dp or ddp or device_map) and not (peft)"
- name: Run examples
run: |
pytest tests/test_examples.py -x -s -k "cli and cuda and torch_ort and not (dp or ddp or device_map) and not (peft)"
run_cli_cuda_torch_ort_multi_gpu_tests:
if: ${{
(github.event_name == 'push') ||
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/test_cli_cuda_vllm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ jobs:
run: |
FORCE_SEQUENTIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and vllm and not (tp or pp)"
- name: Run examples (sequential)
run: |
FORCE_SEQUENTIAL=1 pytest tests/test_examples.py -x -s -k "cli and cuda and vllm and not (tp or pp)"
run_cli_cuda_vllm_multi_gpu_tests:
if: ${{
(github.event_name == 'push') ||
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,11 @@ concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}

jobs:
run_cli_energy_star_tests:
run_energy_star_tests:
if: ${{
(github.event_name == 'push') ||
(github.event_name == 'workflow_dispatch') ||
contains( github.event.pull_request.labels.*.name, 'cli') ||
contains( github.event.pull_request.labels.*.name, 'energy_star') ||
contains( github.event.pull_request.labels.*.name, 'cli_energy_star')
contains( github.event.pull_request.labels.*.name, 'energy_star')
}}

runs-on:
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
19 changes: 10 additions & 9 deletions examples/ipex_bert.yaml → examples/cpu_ipex_bert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,25 @@ defaults:
- _base_
- _self_

name: ipex_bert
name: cpu_ipex_bert

launcher:
numactl: true
numactl_kwargs:
cpunodebind: 0
membind: 0

backend:
device: cpu
export: true
no_weights: false
torch_dtype: bfloat16
model: google-bert/bert-base-uncased

scenario:
latency: true
memory: true
latency: true

input_shapes:
batch_size: 1
sequence_length: 128

backend:
device: cpu
no_weights: false
export: true
torch_dtype: bfloat16
model: bert-base-uncased
21 changes: 11 additions & 10 deletions examples/ipex_llama.yaml → examples/cpu_ipex_llama.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,32 +6,33 @@ defaults:
- _base_
- _self_

name: ipex_llama
name: cpu_ipex_llama

launcher:
numactl: true
numactl_kwargs:
cpunodebind: 0
membind: 0

backend:
device: cpu
export: true
no_weights: false
torch_dtype: bfloat16
model: TinyLlama/TinyLlama-1.1B-Chat-v1.0

scenario:
latency: true
memory: true
latency: true

warmup_runs: 10
iterations: 10
duration: 10

input_shapes:
batch_size: 1
sequence_length: 256

generate_kwargs:
max_new_tokens: 32
min_new_tokens: 32

backend:
device: cpu
export: true
no_weights: false
torch_dtype: bfloat16
model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
Original file line number Diff line number Diff line change
@@ -1,26 +1,24 @@
defaults:
- benchmark
- scenario: inference
- launcher: inline
- backend: llama_cpp
- launcher: process
- _base_
- _self_

name: llama_cpp_llama
name: cpu_llama_cpp_embedding

backend:
device: mps
model: nomic-ai/nomic-embed-text-v1.5-GGUF
device: cpu
task: feature-extraction
model: nomic-ai/nomic-embed-text-v1.5-GGUF
filename: nomic-embed-text-v1.5.Q4_0.gguf

scenario:
input_shapes:
batch_size: 1
sequence_length: 256
vocab_size: 30000
type_vocab_size: 1
max_position_embeddings: 512

generate_kwargs:
max_new_tokens: 100
min_new_tokens: 100
Original file line number Diff line number Diff line change
@@ -1,25 +1,24 @@
defaults:
- benchmark
- scenario: inference
- launcher: inline
- backend: llama_cpp
- launcher: process
- _base_
- _self_

name: llama_cpp_llama
name: cpu_llama_cpp_text_generation

backend:
device: mps
model: TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF
device: cpu
task: text-generation
model: TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF
filename: tinyllama-1.1b-chat-v1.0.Q4_0.gguf


scenario:
input_shapes:
batch_size: 1
sequence_length: 256
vocab_size: 32000

generate_kwargs:
max_new_tokens: 100
min_new_tokens: 100
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@ defaults:
- _base_
- _self_

name: onnxruntime_static_quant_vit
name: cpu_onnxruntime_static_quant_vit

backend:
device: cpu
export: true
no_weights: true
model: google/vit-base-patch16-224
quantization: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ name: onnxruntime_timm

backend:
device: cpu
model: timm/mobilenetv3_large_100.ra_in1k
export: true
model: timm/tiny_vit_21m_224.in1k

scenario:
memory: true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,16 @@ defaults:
- _base_
- _self_

name: openvino_static_quant_bert
name: openvino_static_quant

backend:
device: cpu
no_weights: true
model: bert-base-uncased
quantization: true
calibration: true
reshape: true
no_weights: true
load_in_8bit: true
model: google-bert/bert-base-uncased

scenario:
input_shapes:
batch_size: 1
sequence_length: 16
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,10 @@ defaults:
name: openvino_diffusion

backend:
half: true
device: cpu
model: stabilityai/stable-diffusion-2-1
reshape: true
export: true
half: true
model: stabilityai/stable-diffusion-2-1

scenario:
input_shapes:
Expand Down
5 changes: 3 additions & 2 deletions examples/pytorch_bert.py → examples/cuda_pytorch_bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,13 @@
print(f"Failed to get username from Hugging Face Hub: {e}")
USERNAME = None

BENCHMARK_NAME = "pytorch_bert"
BENCHMARK_NAME = "cuda_pytorch_bert"
MODEL = "google-bert/bert-base-uncased"


def run_benchmark():
launcher_config = ProcessConfig(device_isolation=True, device_isolation_action="warn")
backend_config = PyTorchConfig(device="cuda", device_ids="0", no_weights=True, model="bert-base-uncased")
backend_config = PyTorchConfig(device="cuda", device_ids="0", no_weights=True, model=MODEL)
scenario_config = InferenceConfig(memory=True, latency=True, input_shapes={"batch_size": 1, "sequence_length": 128})
benchmark_config = BenchmarkConfig(
name=BENCHMARK_NAME,
Expand Down
File renamed without changes.
5 changes: 3 additions & 2 deletions examples/pytorch_llama.py → examples/cuda_pytorch_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
print(f"Failed to get username from Hugging Face Hub: {e}")
USERNAME = None

BENCHMARK_NAME = "pytorch-llama"
BENCHMARK_NAME = "cuda_pytorch_llama"
MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

WEIGHTS_CONFIGS = {
"float16": {
Expand Down Expand Up @@ -40,10 +41,10 @@
def run_benchmark(weight_config: str):
launcher_config = ProcessConfig(device_isolation=True, device_isolation_action="warn")
backend_config = PyTorchConfig(
model=MODEL,
device="cuda",
device_ids="0",
no_weights=True,
model="gpt2",
**WEIGHTS_CONFIGS[weight_config],
)
scenario_config = InferenceConfig(
Expand Down
Loading

0 comments on commit 5d5c88d

Please sign in to comment.