From 8029a711d880be393f91c43d85ee2ddab838a2d5 Mon Sep 17 00:00:00 2001
From: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>
Date: Mon, 19 Feb 2024 08:25:33 +0100
Subject: [PATCH] [feature][refactor] Better Metrics and Trackers (#124)
---
.github/workflows/test_api_cuda.yaml | 15 +-
.github/workflows/test_api_rocm.yaml | 6 +-
.../workflows/test_cli_cuda_onnxruntime.yaml | 10 +-
.github/workflows/test_cli_cuda_pytorch.yaml | 15 +-
.../workflows/test_cli_cuda_torch_ort.yaml | 10 +-
.../workflows/test_cli_rocm_onnxruntime.yaml | 10 +-
.github/workflows/test_cli_rocm_pytorch.yaml | 6 +-
.github/workflows/test_cli_tensorrt_llm.yaml | 14 +-
.../test_cli_tensorrt_onnxruntime.yaml | 9 +-
.gitignore | 1 +
Makefile | 61 ++-
README.md | 115 +++---
docker/cpu.dockerfile | 1 -
docker/cuda.dockerfile | 4 +-
docker/rocm-ort.dockerfile | 5 +-
docker/tensorrt.dockerfile | 2 +-
examples/api_launch.py | 21 ++
examples/pytorch_bert.yaml | 6 +-
optimum_benchmark/backends/base.py | 24 +-
optimum_benchmark/backends/config.py | 54 +--
optimum_benchmark/backends/diffusers_utils.py | 2 +-
.../backends/neural_compressor/backend.py | 32 +-
.../backends/neural_compressor/config.py | 10 +-
.../backends/onnxruntime/backend.py | 60 ++-
.../backends/onnxruntime/config.py | 21 +-
.../backends/onnxruntime/utils.py | 8 +-
.../backends/openvino/backend.py | 34 +-
optimum_benchmark/backends/openvino/config.py | 2 +-
optimum_benchmark/backends/openvino/utils.py | 6 +-
optimum_benchmark/backends/peft_utils.py | 11 +-
optimum_benchmark/backends/pytorch/backend.py | 61 ++-
optimum_benchmark/backends/pytorch/config.py | 15 +-
.../backends/tensorrt_llm/backend.py | 10 +-
.../backends/tensorrt_llm/config.py | 5 +-
.../text_generation_inference/backend.py | 37 +-
optimum_benchmark/backends/timm_utils.py | 2 +-
.../backends/torch_ort/backend.py | 24 +-
.../backends/torch_ort/config.py | 2 +-
.../backends/transformers_utils.py | 20 +-
optimum_benchmark/benchmarks/base.py | 2 +-
optimum_benchmark/benchmarks/config.py | 5 +-
.../benchmarks/inference/benchmark.py | 218 ++++++-----
.../benchmarks/inference/callback.py | 25 --
.../benchmarks/inference/config.py | 19 +-
.../benchmarks/inference/report.py | 353 ------------------
optimum_benchmark/benchmarks/report.py | 130 ++++++-
.../benchmarks/training/benchmark.py | 90 +++--
.../benchmarks/training/callback.py | 43 ---
.../benchmarks/training/config.py | 9 +-
.../benchmarks/training/report.py | 169 ---------
optimum_benchmark/benchmarks/utils.py | 1 -
optimum_benchmark/cli.py | 47 +--
optimum_benchmark/env_utils.py | 175 ---------
optimum_benchmark/experiment.py | 161 ++++----
.../generators/input_generator.py | 20 +-
.../generators/task_generator.py | 90 +----
optimum_benchmark/import_utils.py | 57 ++-
optimum_benchmark/launchers/base.py | 5 +-
optimum_benchmark/launchers/config.py | 4 +-
.../launchers/inline/launcher.py | 17 +-
.../launchers/isolation_utils.py | 75 ++--
.../launchers/process/launcher.py | 65 ++--
.../launchers/torchrun/config.py | 2 +-
.../launchers/torchrun/launcher.py | 85 ++---
optimum_benchmark/logging_utils.py | 27 +-
optimum_benchmark/system_utils.py | 219 +++++++++++
optimum_benchmark/task_utils.py | 52 +--
optimum_benchmark/trackers/energy.py | 115 ++++--
optimum_benchmark/trackers/latency.py | 252 ++++++++++---
optimum_benchmark/trackers/memory.py | 338 +++++++++++------
pyproject.toml | 13 +
setup.py | 27 +-
tests/configs/_base_.yaml | 13 +-
tests/configs/_bert_sweep_.yaml | 3 +-
..._lm_naive_mp_.yaml => _gpt_naive_mp_.yaml} | 6 +-
.../{_lm_peft_.yaml => _gpt_peft_.yaml} | 0
.../{_lm_sweep_.yaml => _gpt_sweep_.yaml} | 1 -
...nference_neural_compressor_gpt_sweep.yaml} | 4 +-
... cpu_inference_onnxruntime_gpt_sweep.yaml} | 4 +-
... => cpu_inference_openvino_gpt_sweep.yaml} | 4 +-
...l => cpu_inference_pytorch_gpt_sweep.yaml} | 4 +-
...ml => cpu_training_pytorch_gpt_sweep.yaml} | 4 +-
...cuda_inference_onnxruntime_gpt_sweep.yaml} | 4 +-
... cuda_inference_pytorch_gpt_naive_mp.yaml} | 4 +-
... => cuda_inference_pytorch_gpt_sweep.yaml} | 4 +-
...> cuda_training_pytorch_gpt_naive_mp.yaml} | 4 +-
...ml => cuda_training_pytorch_gpt_peft.yaml} | 4 +-
...l => cuda_training_pytorch_gpt_sweep.yaml} | 4 +-
... => cuda_training_torch_ort_gpt_peft.yaml} | 4 +-
...=> cuda_training_torch_ort_gpt_sweep.yaml} | 4 +-
...rocm_inference_onnxruntime_gpt_sweep.yaml} | 4 +-
tests/test_api.py | 138 +++----
tests/test_cli.py | 11 +-
93 files changed, 1863 insertions(+), 2026 deletions(-)
create mode 100644 examples/api_launch.py
delete mode 100644 optimum_benchmark/benchmarks/inference/callback.py
delete mode 100644 optimum_benchmark/benchmarks/inference/report.py
delete mode 100644 optimum_benchmark/benchmarks/training/callback.py
delete mode 100644 optimum_benchmark/benchmarks/training/report.py
delete mode 100644 optimum_benchmark/benchmarks/utils.py
delete mode 100644 optimum_benchmark/env_utils.py
create mode 100644 optimum_benchmark/system_utils.py
rename tests/configs/{_lm_naive_mp_.yaml => _gpt_naive_mp_.yaml} (100%)
rename tests/configs/{_lm_peft_.yaml => _gpt_peft_.yaml} (100%)
rename tests/configs/{_lm_sweep_.yaml => _gpt_sweep_.yaml} (81%)
rename tests/configs/{cpu_inference_neural_compressor_lm_sweep.yaml => cpu_inference_neural_compressor_gpt_sweep.yaml} (70%)
rename tests/configs/{cpu_inference_onnxruntime_lm_sweep.yaml => cpu_inference_onnxruntime_gpt_sweep.yaml} (71%)
rename tests/configs/{cpu_inference_openvino_lm_sweep.yaml => cpu_inference_openvino_gpt_sweep.yaml} (71%)
rename tests/configs/{cpu_inference_pytorch_lm_sweep.yaml => cpu_inference_pytorch_gpt_sweep.yaml} (72%)
rename tests/configs/{cpu_training_pytorch_lm_sweep.yaml => cpu_training_pytorch_gpt_sweep.yaml} (72%)
rename tests/configs/{cuda_inference_onnxruntime_lm_sweep.yaml => cuda_inference_onnxruntime_gpt_sweep.yaml} (71%)
rename tests/configs/{cuda_inference_pytorch_lm_naive_mp.yaml => cuda_inference_pytorch_gpt_naive_mp.yaml} (70%)
rename tests/configs/{cuda_inference_pytorch_lm_sweep.yaml => cuda_inference_pytorch_gpt_sweep.yaml} (72%)
rename tests/configs/{cuda_training_pytorch_lm_naive_mp.yaml => cuda_training_pytorch_gpt_naive_mp.yaml} (70%)
rename tests/configs/{cuda_training_pytorch_lm_peft.yaml => cuda_training_pytorch_gpt_peft.yaml} (69%)
rename tests/configs/{cuda_training_pytorch_lm_sweep.yaml => cuda_training_pytorch_gpt_sweep.yaml} (69%)
rename tests/configs/{cuda_training_torch_ort_lm_peft.yaml => cuda_training_torch_ort_gpt_peft.yaml} (69%)
rename tests/configs/{cuda_training_torch_ort_lm_sweep.yaml => cuda_training_torch_ort_gpt_sweep.yaml} (69%)
rename tests/configs/{rocm_inference_onnxruntime_lm_sweep.yaml => rocm_inference_onnxruntime_gpt_sweep.yaml} (71%)
diff --git a/.github/workflows/test_api_cuda.yaml b/.github/workflows/test_api_cuda.yaml
index fe08f29d..28d9b435 100644
--- a/.github/workflows/test_api_cuda.yaml
+++ b/.github/workflows/test_api_cuda.yaml
@@ -18,11 +18,11 @@ jobs:
matrix:
image:
[
- { torch_cuda: cu121, cuda_version: 12.1.1 },
- { torch_cuda: cu118, cuda_version: 11.8.0 },
+ { torch_cuda: cu118, torch_pre_release: 0, cuda_version: 11.8.0 },
+ { torch_cuda: cu121, torch_pre_release: 1, cuda_version: 12.1.1 },
]
- runs-on: hf-dgx-01
+ runs-on: nvidia-gpu
steps:
- name: Checkout
uses: actions/checkout@v3
@@ -37,17 +37,20 @@ jobs:
--tag opt-bench-cuda:${{ matrix.image.cuda_version }}
.
+ - name: Get GPUs with most free memory
+ id: get_devices
+ run: |
+ echo "::set-output name=devices::$(nvidia-smi --query-gpu=memory.free,index --format=csv,noheader,nounits | sort -n -k1 | tail -n 2 | awk -F', ' '{print $2}' | xargs echo -n | sed 's/ /,/g' | awk '{print $0}')"
+
- name: Run tests
run: docker run
--rm
- --net host
--pid host
--shm-size 64G
--env USE_CUDA="1"
- --volume $HOME/.cache/huggingface:/home/user/.cache/huggingface
+ --gpus '"device=${{ steps.get_devices.outputs.devices }}"'
--volume $(pwd):/workspace/optimum-benchmark
--workdir /workspace/optimum-benchmark
- --gpus '"device=0,1"'
--entrypoint /bin/bash
opt-bench-cuda:${{ matrix.image.cuda_version }}
-c "pip install -e .[testing,timm,diffusers] && pytest -k 'api and cuda' -x"
diff --git a/.github/workflows/test_api_rocm.yaml b/.github/workflows/test_api_rocm.yaml
index 31328eb3..7e2bf63a 100644
--- a/.github/workflows/test_api_rocm.yaml
+++ b/.github/workflows/test_api_rocm.yaml
@@ -19,10 +19,10 @@ jobs:
image:
[
{ torch_rocm: rocm5.6, torch_pre_release: 0, rocm_version: 5.6.1 },
- { torch_rocm: rocm5.7, torch_pre_release: 1, rocm_version: 5.7 },
+ { torch_rocm: rocm5.7, torch_pre_release: 1, rocm_version: 5.7.1 },
]
- runs-on: hf-amd-mi210-dev
+ runs-on: amd-gpu
steps:
- name: Checkout code
uses: actions/checkout@v3
@@ -41,11 +41,9 @@ jobs:
- name: Run tests
run: docker run
--rm
- --net host
--pid host
--shm-size 64G
--env USE_ROCM="1"
- --volume $HOME/.cache/huggingface:/home/user/.cache/huggingface
--volume $(pwd):/workspace/optimum-benchmark
--workdir /workspace/optimum-benchmark
--device /dev/kfd
diff --git a/.github/workflows/test_cli_cuda_onnxruntime.yaml b/.github/workflows/test_cli_cuda_onnxruntime.yaml
index 0b03608e..adb31be3 100644
--- a/.github/workflows/test_cli_cuda_onnxruntime.yaml
+++ b/.github/workflows/test_cli_cuda_onnxruntime.yaml
@@ -13,7 +13,7 @@ concurrency:
jobs:
build_image_and_run_cli_cuda_onnxruntime_tests:
- runs-on: hf-dgx-01
+ runs-on: nvidia-gpu
steps:
- name: Checkout
uses: actions/checkout@v3
@@ -28,16 +28,20 @@ jobs:
--tag opt-bench-cuda:11.8.0
.
+ - name: Get GPUs with most free memory
+ id: get_devices
+ run: |
+ echo "::set-output name=devices::$(nvidia-smi --query-gpu=memory.free,index --format=csv,noheader,nounits | sort -n -k1 | tail -n 2 | awk -F', ' '{print $2}' | xargs echo -n | sed 's/ /,/g' | awk '{print $0}')"
+
- name: Run tests
run: docker run
--rm
- --net host
--pid host
--shm-size 64G
--env USE_CUDA="1"
--entrypoint /bin/bash
+ --gpus '"device=${{ steps.get_devices.outputs.devices }}"'
--volume $(pwd):/workspace/optimum-benchmark
--workdir /workspace/optimum-benchmark
- --gpus '"device=0,1"'
opt-bench-cuda:11.8.0
-c "pip install -e .[testing,onnxruntime-gpu,diffusers,timm] && pytest -k 'cli and cuda and onnxruntime' -x"
diff --git a/.github/workflows/test_cli_cuda_pytorch.yaml b/.github/workflows/test_cli_cuda_pytorch.yaml
index 1b3fd99f..204722db 100644
--- a/.github/workflows/test_cli_cuda_pytorch.yaml
+++ b/.github/workflows/test_cli_cuda_pytorch.yaml
@@ -18,11 +18,11 @@ jobs:
matrix:
image:
[
- { torch_cuda: cu121, cuda_version: 12.1.1 },
- { torch_cuda: cu118, cuda_version: 11.8.0 },
+ { torch_cuda: cu118, torch_pre_release: 0, cuda_version: 11.8.0 },
+ { torch_cuda: cu121, torch_pre_release: 1, cuda_version: 12.1.1 },
]
- runs-on: hf-dgx-01
+ runs-on: nvidia-gpu
steps:
- name: Checkout
uses: actions/checkout@v3
@@ -37,17 +37,20 @@ jobs:
--tag opt-bench-cuda:${{ matrix.image.cuda_version }}
.
+ - name: Get GPUs with most free memory
+ id: get_devices
+ run: |
+ echo "::set-output name=devices::$(nvidia-smi --query-gpu=memory.free,index --format=csv,noheader,nounits | sort -n -k1 | tail -n 2 | awk -F', ' '{print $2}' | xargs echo -n | sed 's/ /,/g' | awk '{print $0}')"
+
- name: Run tests
run: docker run
--rm
- --net host
--pid host
--shm-size 64G
--env USE_CUDA="1"
- --volume $HOME/.cache/huggingface:/home/user/.cache/huggingface
+ --gpus '"device=${{ steps.get_devices.outputs.devices }}"'
--volume $(pwd):/workspace/optimum-benchmark
--workdir /workspace/optimum-benchmark
- --gpus '"device=0,1"'
--entrypoint /bin/bash
opt-bench-cuda:${{ matrix.image.cuda_version }}
-c "pip install -e .[testing,diffusers,timm,deepspeed,peft] && pytest -k 'cli and cuda and pytorch' -x"
diff --git a/.github/workflows/test_cli_cuda_torch_ort.yaml b/.github/workflows/test_cli_cuda_torch_ort.yaml
index 71bfd33e..680f3f0f 100644
--- a/.github/workflows/test_cli_cuda_torch_ort.yaml
+++ b/.github/workflows/test_cli_cuda_torch_ort.yaml
@@ -13,7 +13,7 @@ concurrency:
jobs:
build_image_and_run_cli_cuda_torch_ort_tests:
- runs-on: hf-dgx-01
+ runs-on: nvidia-gpu
steps:
- name: Checkout
uses: actions/checkout@v3
@@ -28,16 +28,20 @@ jobs:
--tag opt-bench-cuda:11.8.0
.
+ - name: Get GPUs with most free memory
+ id: get_devices
+ run: |
+ echo "::set-output name=devices::$(nvidia-smi --query-gpu=memory.free,index --format=csv,noheader,nounits | sort -n -k1 | tail -n 2 | awk -F', ' '{print $2}' | xargs echo -n | sed 's/ /,/g' | awk '{print $0}')"
+
- name: Run tests
run: docker run
--rm
- --net host
--pid host
--shm-size 64G
--env USE_CUDA="1"
--entrypoint /bin/bash
+ --gpus '"device=${{ steps.get_devices.outputs.devices }}"'
--volume $(pwd):/workspace/optimum-benchmark
--workdir /workspace/optimum-benchmark
- --gpus '"device=0,1"'
opt-bench-cuda:11.8.0
-c "pip install -e .[testing,torch-ort,peft] && python -m torch_ort.configure && pytest -k 'cli and cuda and torch_ort' -x"
diff --git a/.github/workflows/test_cli_rocm_onnxruntime.yaml b/.github/workflows/test_cli_rocm_onnxruntime.yaml
index fcd0f53d..8be58292 100644
--- a/.github/workflows/test_cli_rocm_onnxruntime.yaml
+++ b/.github/workflows/test_cli_rocm_onnxruntime.yaml
@@ -13,7 +13,7 @@ concurrency:
jobs:
build_image_and_run_cli_rocm_onnxruntime_tests:
- runs-on: hf-amd-mi210-dev
+ runs-on: amd-gpu
steps:
- name: Checkout
uses: actions/checkout@v3
@@ -21,7 +21,7 @@ jobs:
- name: Check if image exists
id: check_image
run: |
- if [[ "$(docker images -q opt-bench-rocm-ort:5.7 2> /dev/null)" == "" ]]; then
+ if [[ "$(docker images -q opt-bench-rocm-ort:latest 2> /dev/null)" == "" ]]; then
echo "::set-output name=exists::false"
else
echo "::set-output name=exists::true"
@@ -33,14 +33,12 @@ jobs:
--file docker/rocm-ort.dockerfile
--build-arg USER_ID=$(id -u)
--build-arg GROUP_ID=$(id -g)
- --build-arg ROCM_VERSION=5.7
- --tag opt-bench-rocm-ort:5.7
+ --tag opt-bench-rocm-ort:latest
.
- name: Run tests
run: docker run
--rm
- --net host
--pid host
--shm-size 64G
--env USE_ROCM="1"
@@ -50,5 +48,5 @@ jobs:
--device /dev/dri/renderD128
--device /dev/dri/renderD129
--entrypoint /bin/bash
- opt-bench-rocm-ort:5.7
+ opt-bench-rocm-ort:latest
-c "pip install -e .[testing,timm,diffusers] && pytest -k 'cli and rocm and onnxruntime' -x"
diff --git a/.github/workflows/test_cli_rocm_pytorch.yaml b/.github/workflows/test_cli_rocm_pytorch.yaml
index 11c9e77a..c4ae7139 100644
--- a/.github/workflows/test_cli_rocm_pytorch.yaml
+++ b/.github/workflows/test_cli_rocm_pytorch.yaml
@@ -19,10 +19,10 @@ jobs:
image:
[
{ torch_rocm: rocm5.6, torch_pre_release: 0, rocm_version: 5.6.1 },
- { torch_rocm: rocm5.7, torch_pre_release: 1, rocm_version: 5.7 },
+ { torch_rocm: rocm5.7, torch_pre_release: 1, rocm_version: 5.7.1 },
]
- runs-on: hf-amd-mi210-dev
+ runs-on: [amd-gpu]
steps:
- name: Checkout code
uses: actions/checkout@v3
@@ -41,11 +41,9 @@ jobs:
- name: Run tests
run: docker run
--rm
- --net host
--pid host
--shm-size 64G
--env USE_ROCM="1"
- --volume $HOME/.cache/huggingface:/home/user/.cache/huggingface
--volume $(pwd):/workspace/optimum-benchmark
--workdir /workspace/optimum-benchmark
--device /dev/kfd
diff --git a/.github/workflows/test_cli_tensorrt_llm.yaml b/.github/workflows/test_cli_tensorrt_llm.yaml
index 0169fca5..40438055 100644
--- a/.github/workflows/test_cli_tensorrt_llm.yaml
+++ b/.github/workflows/test_cli_tensorrt_llm.yaml
@@ -13,7 +13,7 @@ concurrency:
jobs:
pull_image_and_run_cli_tensorrt_llm_tests:
- runs-on: hf-dgx-01
+ runs-on: nvidia-gpu
steps:
- name: Checkout
uses: actions/checkout@v3
@@ -26,18 +26,20 @@ jobs:
--tag opt-bench-tensorrt-llm:latest
.
+ - name: Get GPUs with most free memory
+ id: get_devices
+ run: |
+ echo "::set-output name=devices::$(nvidia-smi --query-gpu=memory.free,index --format=csv,noheader,nounits | sort -n -k1 | tail -n 2 | awk -F', ' '{print $2}' | xargs echo -n | sed 's/ /,/g' | awk '{print $0}')"
+
- name: Run tests
run: docker run
--rm
- --net host
--pid host
--shm-size 64G
--env USE_CUDA="1"
- --env USER_ID=$(id -u)
- --env GROUP_ID=$(id -g)
+ --gpus '"device=${{ steps.get_devices.outputs.devices }}"'
--volume $(pwd):/workspace/optimum-benchmark
--workdir /workspace/optimum-benchmark
- --gpus '"device=0,1"'
--entrypoint /bin/bash
opt-bench-tensorrt-llm:latest
- -c "pip install -e .[testing] && pytest -k 'cli and tensorrt_llm' -x"
+ -c "pip install -e .[testing] && pip uninstall -y nvidia-ml-py && pytest -k 'cli and tensorrt_llm' -x"
diff --git a/.github/workflows/test_cli_tensorrt_onnxruntime.yaml b/.github/workflows/test_cli_tensorrt_onnxruntime.yaml
index 92f425e7..a98bfc15 100644
--- a/.github/workflows/test_cli_tensorrt_onnxruntime.yaml
+++ b/.github/workflows/test_cli_tensorrt_onnxruntime.yaml
@@ -13,7 +13,7 @@ concurrency:
jobs:
build_image_and_run_cli_tensorrt_onnxruntime_tests:
- runs-on: hf-dgx-01
+ runs-on: nvidia-gpu
steps:
- name: Checkout
uses: actions/checkout@v3
@@ -23,15 +23,12 @@ jobs:
--file docker/tensorrt.dockerfile
--build-arg USER_ID=$(id -u)
--build-arg GROUP_ID=$(id -g)
- --build-arg TENSORRT_VERSION=22.12
- --build-arg TORCH_CUDA=cu118
- --tag opt-bench-tensorrt:22.12
+ --tag opt-bench-tensorrt:latest
.
- name: Run tests
run: docker run
--rm
- --net host
--pid host
--shm-size 64G
--env USE_CUDA="1"
@@ -39,5 +36,5 @@ jobs:
--workdir /workspace/optimum-benchmark
--gpus '"device=0,1"'
--entrypoint /bin/bash
- opt-bench-tensorrt:22.12
+ opt-bench-tensorrt:latest
-c "pip install -e .[testing,onnxruntime-gpu,diffusers,timm] && pytest -k 'cli and tensorrt and onnxruntime' -x"
diff --git a/.gitignore b/.gitignore
index 12c19326..a8e86c83 100644
--- a/.gitignore
+++ b/.gitignore
@@ -171,3 +171,4 @@ actions-runner/
experiments/
examples/
.engine/
+amdsmi
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 55e44e1e..0253c183 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,9 @@
# List of targets that are not associated with files
-.PHONY: quality style install install_dev_cpu install_dev_gpu
+.PHONY: quality style install \
+ build_docker_cpu, build_docker_cuda, build_docker_rocm, \
+ test_cli_cpu_pytorch, test_cli_rocm_pytorch, \
+ test_cli_cpu_neural_compressor, test_cli_cpu_onnxruntime, test_cli_cpu_openvino, \
+ test_api_cpu, test_api_cuda, test_api_rocm, test_api_misc
quality:
ruff check .
@@ -13,13 +17,13 @@ install:
pip install -e .
build_docker_cpu:
- docker build -f docker/cuda.dockerfile --build-arg USER_ID=$(id -u) --build-arg GROUP_ID=$(id -g) -t opt-bench-cpu:latest .
+ docker build -f docker/cpu.dockerfile --build-arg USER_ID=$(shell id -u) --build-arg GROUP_ID=$(shell id -g) -t opt-bench-cpu:latest .
build_docker_cuda:
- docker build -f docker/cuda.dockerfile --build-arg USER_ID=$(id -u) --build-arg GROUP_ID=$(id -g) --build-arg TORCH_CUDA=cu118 --build-arg CUDA_VERSION=11.8.0 -t opt-bench-cuda:11.8.0 .
+ docker build -f docker/cuda.dockerfile --build-arg USER_ID=$(shell id -u) --build-arg GROUP_ID=$(shell id -g) -t opt-bench-cuda:latest .
build_docker_rocm:
- docker build -f docker/rocm.dockerfile --build-arg USER_ID=$(id -u) --build-arg GROUP_ID=$(id -g) --build-arg TORCH_ROCM=rocm5.6 --build-arg ROCM_VERSION=5.6.1 -t opt-bench-rocm:5.6.1 .
+ docker build -f docker/rocm.dockerfile --build-arg USER_ID=$(shell id -u) --build-arg GROUP_ID=$(shell id -g) -t opt-bench-rocm:latest .
test_cli_cpu_neural_compressor:
docker run \
@@ -27,23 +31,23 @@ test_cli_cpu_neural_compressor:
--entrypoint /bin/bash \
--volume $(PWD):/workspace \
--workdir /workspace \
- opt-bench-cpu:latest -c "pip install -e .[testing,neural-compressor] && pytest tests/ -k 'cli and cpu and neural_compressor' -x"
+ opt-bench-cpu:latest -c "pip install -e .[testing,neural-compressor,diffusers,timm] && pytest tests/ -k 'cli and cpu and neural_compressor' -x"
-test_cli_cpu_openvino:
+test_cli_cpu_onnxruntime:
docker run \
--rm \
--entrypoint /bin/bash \
--volume $(PWD):/workspace \
--workdir /workspace \
- opt-bench-cpu:latest -c "pip install -e .[testing,openvino,diffusers] && pytest tests/ -k 'cli and cpu and openvino' -x"
+ opt-bench-cpu:latest -c "pip install -e .[testing,onnxruntime,diffusers,timm] && pytest tests/ -k 'cli and cpu and onnxruntime' -x"
-test_cli_cpu_onnxruntime:
+test_cli_cpu_openvino:
docker run \
--rm \
--entrypoint /bin/bash \
--volume $(PWD):/workspace \
--workdir /workspace \
- opt-bench-cpu:latest -c "pip install -e .[testing,onnxruntime,diffusers,timm] && pytest tests/ -k 'cli and cpu and onnxruntime' -x"
+ opt-bench-cpu:latest -c "pip install -e .[testing,openvino,diffusers,timm] && pytest tests/ -k 'cli and cpu and openvino' -x"
test_cli_cpu_pytorch:
docker run \
@@ -53,13 +57,34 @@ test_cli_cpu_pytorch:
--workdir /workspace \
opt-bench-cpu:latest -c "pip install -e .[testing,diffusers,timm] && pytest tests/ -k 'cli and cpu and pytorch' -x"
+test_cli_rocm_pytorch:
+ docker run \
+ --rm \
+ --device=/dev/kfd \
+ --device /dev/dri/renderD128 \
+ --device /dev/dri/renderD129 \
+ --group-add video \
+ --entrypoint /bin/bash \
+ --volume $(PWD):/workspace \
+ --workdir /workspace \
+ opt-bench-rocm:latest -c "pip install -e .[testing,diffusers,timm,deepspeed,peft] && pytest tests/ -k 'cli and cuda and pytorch' -x"
+
+test_cli_cuda_pytorch:
+ docker run \
+ --rm \
+ --gpus '"device=0,1"' \
+ --entrypoint /bin/bash \
+ --volume $(PWD):/workspace \
+ --workdir /workspace \
+ opt-bench-cuda:latest -c "pip install -e .[testing,diffusers,timm,deepspeed,peft] && pytest tests/ -k 'cli and cuda and pytorch' -x"
+
test_api_cpu:
docker run \
--rm \
--entrypoint /bin/bash \
--volume $(PWD):/workspace \
--workdir /workspace \
- opt-bench-cpu:latest -c "pip install -e .[testing,timm,diffusers,codecarbon] && pytest tests/ -k 'api and cpu' -x"
+ opt-bench-cpu:latest -c "pip install -e .[testing,timm,diffusers] && pytest tests/ -k 'api and cpu' -x"
test_api_cuda:
docker run \
@@ -68,7 +93,19 @@ test_api_cuda:
--entrypoint /bin/bash \
--volume $(PWD):/workspace \
--workdir /workspace \
- opt-bench-cuda:11.8.0 -c "pip install -e .[testing,timm,diffusers,codecarbon] && pytest tests/ -k 'api and cuda' -x"
+ opt-bench-cuda:latest -c "pip install -e .[testing,timm,diffusers] && pytest tests/ -k 'api and cuda' -x"
+
+test_api_rocm:
+ docker run \
+ --rm \
+ --device=/dev/kfd \
+ --device /dev/dri/renderD128 \
+ --device /dev/dri/renderD129 \
+ --group-add video \
+ --entrypoint /bin/bash \
+ --volume $(PWD):/workspace \
+ --workdir /workspace \
+ opt-bench-rocm:latest -c "pip install -e .[testing,timm,diffusers] && pytest tests/ -k 'api and cuda' -x"
test_api_misc:
docker run \
@@ -76,4 +113,4 @@ test_api_misc:
--entrypoint /bin/bash \
--volume $(PWD):/workspace \
--workdir /workspace \
- opt-bench-cpu:latest -c "pip install -e .[testing,timm,diffusers,codecarbon] && pytest tests/ -k 'api and not (cpu or cuda or rocm or tensorrt)' -x"
+ opt-bench-cpu:latest -c "pip install -e .[testing,timm,diffusers] && pytest tests/ -k 'api and not (cpu or cuda or rocm or tensorrt)' -x"
diff --git a/README.md b/README.md
index e338b888..49889327 100644
--- a/README.md
+++ b/README.md
@@ -1,37 +1,42 @@
-
-
-
+
+All benchmarks are wrong, some will cost you less than the others.
Optimum-Benchmark ๐๏ธ
-Optimum-Benchmark is a unified [multi-backend & multi-device](#backends--devices-) utility for benchmarking [Transformers](https://github.com/huggingface/transformers), [Diffusers](https://github.com/huggingface/diffusers), [PEFT](https://github.com/huggingface/peft), [TIMM](https://github.com/huggingface/pytorch-image-models) and [Optimum](https://github.com/huggingface/optimum) flavors, along with all their supported [optimizations & quantization schemes](#backend-features-), for [inference & training](#benchmark-features-%EF%B8%8F), in [distributed & non-distributed settings](#backend-features-).
+Optimum-Benchmark is a unified [multi-backend & multi-device](#backends--devices-) utility for benchmarking [Transformers](https://github.com/huggingface/transformers), [Diffusers](https://github.com/huggingface/diffusers), [PEFT](https://github.com/huggingface/peft), [TIMM](https://github.com/huggingface/pytorch-image-models) and [Optimum](https://github.com/huggingface/optimum) flavors, along with all their supported [optimizations & quantization schemes](#backend-features-), for [inference & training](#benchmark-features-%EF%B8%8F), in [distributed & non-distributed settings](#backend-features-), in the most correct and scalable way possible (no need to even download model weights).
-## Motivation ๐ค
+*News* ๐ฐ
+- PYPI release soon.
+- Added a simple Python API to run benchmarks with all isolation and tracking features supported by the CLI.
+*Motivations* ๐ค
- HF hardware partners wanting to know how their hardware performs compared to another hardware on the same models.
- HF ecosystem users wanting to know how their chosen model performs in terms of latency, throughput, memory usage, energy consumption, etc compared to another model.
- Experimenting with hardware & backend specific optimizations & quantization schemes that can be applied to models and improve their computational/memory/energy efficiency.
+*Notes* ๐
+- If you were using `optimum-benchmark` before and want to keep using the old CLI only version, you can still do so by installing from this branch [`0.0.1`](https://github.com/huggingface/optimum-benchmark/tree/0.0.1).
+
## Current status ๐
### API
-
[![CPU](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_api_cpu.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_api_cpu.yaml)
[![CUDA](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_api_cuda.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_api_cuda.yaml)
[![ROCM](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_api_rocm.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_api_rocm.yaml)
+[![MISC](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_api_misc.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_api_misc.yaml)
### CLI
-
-[![CPU Pytorch Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cpu_pytorch.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cpu_pytorch.yaml)
-[![CPU OnnxRuntime Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cpu_onnxruntime.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cpu_onnxruntime.yaml)
-[![CPU Intel Neural Compressor Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cpu_neural_compressor.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cpu_neural_compressor.yaml)
-[![CPU OpenVINO Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cpu_openvino.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cpu_openvino.yaml)
-[![CUDA Pytorch Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cuda_pytorch.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cuda_pytorch.yaml)
-[![CUDA OnnxRuntime Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cuda_onnxruntime_inference.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cuda_onnxruntime_inference.yaml)
-[![CUDA Torch-ORT Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cuda_torch_ort_training.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cuda_torch_ort_training.yaml)
-[![TensorRT OnnxRuntime Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_tensorrt_onnxruntime_inference.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_tensorrt_onnxruntime_inference.yaml)
-[![TensorRT-LLM Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_tensorrt_llm.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_tensorrt_llm.yaml)
-[![ROCm Pytorch Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_rocm_pytorch.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_rocm_pytorch.yaml)
-[![ROCm OnnxRuntime Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_rocm_onnxruntime_inference.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_rocm_onnxruntime_inference.yaml)
+[![CPU Pytorch Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_pytorch.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_pytorch.yaml)
+[![CPU OnnxRuntime Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_onnxruntime.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_onnxruntime.yaml)
+[![CPU Intel Neural Compressor Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_neural_compressor.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_neural_compressor.yaml)
+[![CPU OpenVINO Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_openvino.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_openvino.yaml)
+[![CUDA Pytorch Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_pytorch.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_pytorch.yaml)
+[![CUDA OnnxRuntime Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_onnxruntime.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_onnxruntime.yaml)
+[![CUDA Torch-ORT Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_torch_ort.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_torch_ort.yaml)
+[![TensorRT OnnxRuntime Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_tensorrt_onnxruntime.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_tensorrt_onnxruntime.yaml)
+[![TensorRT-LLM Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_tensorrt_llm.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_tensorrt_llm.yaml)
+[![ROCm Pytorch Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_rocm_pytorch.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_rocm_pytorch.yaml)
+[![ROCm OnnxRuntime Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_rocm_onnxruntime.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_rocm_onnxruntime.yaml)
+[![MISC Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_misc.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_misc.yaml)
## Quickstart ๐
@@ -64,46 +69,36 @@ Depending on the backends you want to use, you might need to install some extra
### Running benchmarks from Python API ๐งช
-You can run benchmarks from the Python API, using the `launch` function from the `optimum_benchmark.experiment` module. Here's an example of how to run a benchmark using the `pytorch` backend, `process` launcher and `inference` benchmark.
+You can run benchmarks from the Python API, using the `launch` function. Here's an example of how to run a benchmark using the `pytorch` backend, `torchrun` launcher and `inference` benchmark.
```python
from optimum_benchmark.logging_utils import setup_logging
from optimum_benchmark.experiment import launch, ExperimentConfig
from optimum_benchmark.backends.pytorch.config import PyTorchConfig
-from optimum_benchmark.launchers.process.config import ProcessConfig
+from optimum_benchmark.launchers.torchrun.config import TorchrunConfig
from optimum_benchmark.benchmarks.inference.config import InferenceConfig
-
if __name__ == "__main__":
setup_logging(level="INFO")
- benchmark_config = InferenceConfig(latency=False, memory=True, energy=True)
- launcher_config = ProcessConfig()
- backend_config = PyTorchConfig(
- device="cuda",
- no_weights=True,
- device_ids="0,1",
- device_map="auto",
- model="IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemm",
- )
+ launcher_config = TorchrunConfig(nproc_per_node=2)
+ benchmark_config = InferenceConfig(latency=True, memory=True)
+ backend_config = PyTorchConfig(model="gpt2", device="cuda", device_ids="0,1", no_weights=True)
experiment_config = ExperimentConfig(
- experiment_name="python-api-launch-experiment",
+ experiment_name="api-launch",
benchmark=benchmark_config,
launcher=launcher_config,
backend=backend_config,
)
benchmark_report = launch(experiment_config)
- benchmark_report.log_all()
- # or
- print(benchmark_report.to_dict())
- # or
- benchmark_report.push_to_hub("IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemm")
+ experiment_config.push_to_hub("IlyasMoutawwakil/benchmarks") # pushes experiment_config.json to the hub
+ benchmark_report.push_to_hub("IlyasMoutawwakil/benchmarks") # pushes benchmark_report.json to the hub
```
-Yep, it's that simple! Check the supported backends, launchers and benchmarks in the [features](#features-) section.
+Yep, it's that simple! Check the supported backends, launchers and benchmarks matrix in the [features](#features-) section.
### Running benchmarks from CLI ๐โโ๏ธ
-You can run a benchmark using the command line by specifying the configuration directory and the configuration name. Both arguments are mandatory for [`hydra`](https://hydra.cc/). `--config-dir` is the directory where the configuration files are stored and `--config-name` is the name of the configuration file without its `.yaml` extension.
+You can also run a benchmark using the command line by specifying the configuration directory and the configuration name. Both arguments are mandatory for [`hydra`](https://hydra.cc/). `--config-dir` is the directory where the configuration files are stored and `--config-name` is the name of the configuration file without its `.yaml` extension.
```bash
optimum-benchmark --config-dir examples/ --config-name pytorch_bert
@@ -111,11 +106,11 @@ optimum-benchmark --config-dir examples/ --config-name pytorch_bert
This will run the benchmark using the configuration in [`examples/pytorch_bert.yaml`](examples/pytorch_bert.yaml) and store the results in `runs/pytorch_bert`.
-The result files are `benchmark_report.json`, the program's logs `experiment.log` and the configuration that's been used `experiment_config.yaml`, including backend, launcher, benchmark and environment configurations.
+The result files are `benchmark_report.json`, the program's logs `cli.log` and the configuration that's been used `experiment_config.json`, including backend, launcher, benchmark and environment configurations.
The directory for storing these results can be changed by setting `hydra.run.dir` (and/or `hydra.sweep.dir` in case of a multirun) in the command line or in the config file.
-### Configuration overrides ๐๏ธ
+#### Configuration overrides ๐๏ธ
It's easy to override the default behavior of a benchmark from the command line.
@@ -123,40 +118,17 @@ It's easy to override the default behavior of a benchmark from the command line.
optimum-benchmark --config-dir examples/ --config-name pytorch_bert backend.model=gpt2 backend.device=cuda
```
-### Configuration multirun sweeps ๐งน
+#### Configuration multirun sweeps ๐งน
You can easily run configuration sweeps using the `-m` or `--multirun` option. By default, configurations will be executed serially but other kinds of executions are supported with hydra's launcher plugins : `=submitit`, `hydra/launcher=rays`, etc.
-Note that the hydra launcher `hydra/launcher` is different than our own `launcher`, specifically `hydra/launcher` can only be used in `--multirun` mode, and will only handle the inter-run behavior.
```bash
optimum-benchmark --config-dir examples --config-name pytorch_bert -m backend.device=cpu,cuda
```
-Also, for integer parameters like `batch_size`, one can specify a range of values to sweep over:
-
-```bash
-optimum-benchmark --config-dir examples --config-name pytorch_bert -m device=cpu,cuda benchmark.input_shapes.batch_size='range(1,10,step=2)'
-```
-
### Configurations structure ๐
-You can create custom configuration files following the [examples here](examples).
-You can also use `hydra`'s [composition](https://hydra.cc/docs/0.11/tutorial/composition/) with a base configuration ([`examples/pytorch_bert.yaml`](examples/pytorch_bert.yaml) for example) and override/define parameters.
-
-To create a configuration that uses a `wav2vec2` model and `onnxruntime` backend, it's as easy as:
-
-```yaml
-defaults:
- - pytorch_bert
- - _self_
- - override backend: onnxruntime
-
-experiment_name: onnxruntime_wav2vec2
-model: bookbot/distil-wav2vec2-adult-child-cls-37m
-device: cpu
-```
-
-Other than the [examples](examples), you can also check [tests](tests/configs/).
+You can create custom configuration files following the [examples here]([examples](https://github.com/IlyasMoutawwakil/optimum-benchmark-examples)).
## Features ๐จ
@@ -171,9 +143,9 @@ Everything else is optional or inferred at runtime, but can be configured to you
### Launchers ๐
+- [x] Distributed inference/training (`launcher=torchrun`)
- [x] Process isolation between consecutive runs (`launcher=process`)
- [x] Assert GPU devices (NVIDIA & AMD) isolation (`launcher.device_isolation=true`)
-- [x] Distributed inference/training (`launcher=torchrun`, `launcher.n_proc_per_node=2`)
### Backends & Devices ๐ฑ
@@ -191,19 +163,18 @@ Everything else is optional or inferred at runtime, but can be configured to you
### Benchmarking ๐๏ธ
- [x] Memory tracking (`benchmark.memory=true`)
-- [x] Latency and throughput tracking of forward pass (default)
+- [x] Energy and efficiency tracking (`benchmark.energy=true`)
+- [x] Latency and throughput tracking (`benchmark.latency=true`)
- [x] Warm up runs before inference (`benchmark.warmup_runs=20`)
- [x] Warm up steps during training (`benchmark.warmup_steps=20`)
-- [x] Energy and carbon emissions tracking (`benchmark.energy=true`)
- [x] Inputs shapes control (e.g. `benchmark.input_shapes.sequence_length=128`)
- [x] Dataset shapes control (e.g. `benchmark.dataset_shapes.dataset_size=1000`)
-- [x] Latancy and throughput tracking of generation pass (auto-enabled for generative models)
-- [x] Prefill latency and Decoding throughput deduced from generation and forward pass (auto-enabled for generative models)
-- [x] Forward and Generation pass control (e.g. for an LLM `benchmark.generate_kwargs.max_new_tokens=100`, for a diffusion model `benchmark.forward_kwargs.num_images_per_prompt=4`)
+- [x] Prefill latency and Decoding throughput deduced from Generate and Forward pass (auto-enabled for text generation models)
+- [x] Forward, Call and Generate pass kwargs control (e.g. for an LLM `benchmark.generate_kwargs.max_new_tokens=100`, for a diffusion model `benchmark.call_kwargs.num_images_per_prompt=4`)
### Backend features ๐งฐ
-- [x] Random weights initialization (`backend.no_weights=true` for fast model instantiation without downloading weights)
+- [x] "No weights" to benchmark models without downloading their weights (`backend.no_weights=true`)
- [x] Onnxruntime Quantization and AutoQuantization (`backend.quantization=true` or `backend.auto_quantization=avx2`, etc)
- [x] Onnxruntime Calibration for Static Quantization (`backend.quantization_config.is_static=true`, etc)
- [x] Onnxruntime Optimization and AutoOptimization (`backend.optimization=true` or `backend.auto_optimization=O4`, etc)
diff --git a/docker/cpu.dockerfile b/docker/cpu.dockerfile
index 371a89c8..f15db72f 100644
--- a/docker/cpu.dockerfile
+++ b/docker/cpu.dockerfile
@@ -1,6 +1,5 @@
FROM ubuntu:latest
-
# Ignore interactive questions during `docker build`
ENV DEBIAN_FRONTEND noninteractive
diff --git a/docker/cuda.dockerfile b/docker/cuda.dockerfile
index a2270ffa..664895d1 100644
--- a/docker/cuda.dockerfile
+++ b/docker/cuda.dockerfile
@@ -13,12 +13,12 @@
# limitations under the License.
ARG CUDNN_VERSION=8
-ARG CUDA_VERSION=12.1.1
+ARG CUDA_VERSION=11.8.0
ARG UBUNTU_VERSION=22.04
FROM nvidia/cuda:${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel-ubuntu${UBUNTU_VERSION}
-ARG TORCH_CUDA=cu121
+ARG TORCH_CUDA=cu118
ARG TORCH_PRE_RELEASE=0
# Ignore interactive questions during `docker build`
diff --git a/docker/rocm-ort.dockerfile b/docker/rocm-ort.dockerfile
index 1dafd137..5309962f 100644
--- a/docker/rocm-ort.dockerfile
+++ b/docker/rocm-ort.dockerfile
@@ -13,10 +13,11 @@
# limitations under the License.
ARG ROCM_VERSION=5.7
-ARG UBUNTU_VERSION=22.04
ARG PYTHON_VERSION=3.10
+ARG UBUNTU_VERSION=22.04
+ARG PYTORCH_VERSION=2.0.1
-FROM rocm/pytorch:rocm${ROCM_VERSION}_ubuntu${UBUNTU_VERSION}_py${PYTHON_VERSION}_pytorch_2.0.1
+FROM rocm/pytorch:rocm${ROCM_VERSION}_ubuntu${UBUNTU_VERSION}_py${PYTHON_VERSION}_pytorch_${PYTORCH_VERSION}
# Ignore interactive questions during `docker build`
ENV DEBIAN_FRONTEND noninteractive
diff --git a/docker/tensorrt.dockerfile b/docker/tensorrt.dockerfile
index 1e2b8603..35c84a63 100644
--- a/docker/tensorrt.dockerfile
+++ b/docker/tensorrt.dockerfile
@@ -16,7 +16,7 @@ ARG TENSORRT_VERSION=23.09
FROM nvcr.io/nvidia/tensorrt:${TENSORRT_VERSION}-py3
-ARG TORCH_CUDA=cu121
+ARG TORCH_CUDA=cu118
# Ignore interactive questions during `docker build`
ENV DEBIAN_FRONTEND noninteractive
diff --git a/examples/api_launch.py b/examples/api_launch.py
new file mode 100644
index 00000000..987ec8c9
--- /dev/null
+++ b/examples/api_launch.py
@@ -0,0 +1,21 @@
+from optimum_benchmark.backends.pytorch.config import PyTorchConfig
+from optimum_benchmark.benchmarks.inference.config import InferenceConfig
+from optimum_benchmark.experiment import ExperimentConfig, launch
+from optimum_benchmark.launchers.torchrun.config import TorchrunConfig
+from optimum_benchmark.logging_utils import setup_logging
+
+
+if __name__ == "__main__":
+ setup_logging(level="INFO")
+ launcher_config = TorchrunConfig(nproc_per_node=2)
+ benchmark_config = InferenceConfig(latency=True, memory=True)
+ backend_config = PyTorchConfig(model="gpt2", device="cuda", device_ids="0,1", no_weights=True)
+ experiment_config = ExperimentConfig(
+ experiment_name="api-launch",
+ benchmark=benchmark_config,
+ launcher=launcher_config,
+ backend=backend_config,
+ )
+ benchmark_report = launch(experiment_config)
+ experiment_config.push_to_hub("IlyasMoutawwakil/benchmarks")
+ benchmark_report.push_to_hub("IlyasMoutawwakil/benchmarks")
diff --git a/examples/pytorch_bert.yaml b/examples/pytorch_bert.yaml
index 5a36147c..e3b08e87 100644
--- a/examples/pytorch_bert.yaml
+++ b/examples/pytorch_bert.yaml
@@ -9,8 +9,12 @@ defaults:
experiment_name: pytorch_bert
+benchmark:
+ latency: true
+ memory: true
+
backend:
- device: cpu
+ device: cuda
device_ids: 0
model: bert-base-uncased
diff --git a/optimum_benchmark/backends/base.py b/optimum_benchmark/backends/base.py
index cf0f5087..2be47a11 100644
--- a/optimum_benchmark/backends/base.py
+++ b/optimum_benchmark/backends/base.py
@@ -1,26 +1,25 @@
import gc
import random
from abc import ABC
-from logging import getLogger
from collections import OrderedDict
-from typing import Optional, ClassVar, Generic, Dict, Any
+from logging import getLogger
+from typing import Any, ClassVar, Dict, Generic, Optional
-from .config import BackendConfigT
-from ..task_utils import get_automodel_class_for_task
+import numpy as np
+from transformers import GenerationConfig, PretrainedConfig, PreTrainedModel, TrainerState
+from ..task_utils import get_automodel_class_for_task
+from .config import BackendConfigT
from .diffusers_utils import extract_diffusers_shapes_from_config, get_diffusers_pretrained_config
-from .timm_utils import extract_timm_shapes_from_config, get_timm_pretrained_config, get_timm_pre_processor
+from .timm_utils import extract_timm_shapes_from_config, get_timm_pre_processor, get_timm_pretrained_config
from .transformers_utils import (
+ PretrainedProcessor,
extract_transformers_shapes_from_artifacts,
get_transformers_generation_config,
- get_transformers_pretrained_config,
get_transformers_pre_processor,
- PretrainedProcessor,
+ get_transformers_pretrained_config,
)
-import numpy as np
-from transformers import GenerationConfig, PretrainedConfig, PreTrainedModel, TrainerState
-
LOGGER = getLogger("backend")
@@ -62,10 +61,7 @@ def __init__(self, config: BackendConfigT):
self.model_type = self.pretrained_config.model_type
self.automodel_class = get_automodel_class_for_task(
- model_type=self.model_type,
- library=self.config.library,
- task=self.config.task,
- framework="pt",
+ model_type=self.model_type, library=self.config.library, task=self.config.task, framework="pt"
)
def seed(self) -> None:
diff --git a/optimum_benchmark/backends/config.py b/optimum_benchmark/backends/config.py
index a4919c15..e8c9c231 100644
--- a/optimum_benchmark/backends/config.py
+++ b/optimum_benchmark/backends/config.py
@@ -1,24 +1,17 @@
import os
from abc import ABC
-from logging import getLogger
from dataclasses import dataclass, field
-from typing import Optional, TypeVar, Dict, Any
+from logging import getLogger
+from typing import Any, Dict, Optional, TypeVar
-from ..import_utils import is_psutil_available
-from ..env_utils import get_cuda_device_ids, is_nvidia_system, is_rocm_system
-from ..task_utils import infer_library_from_model_name_or_path, infer_task_from_model_name_or_path
+from psutil import cpu_count
-if is_psutil_available():
- from psutil import cpu_count
+from ..system_utils import get_gpu_device_ids, is_nvidia_system, is_rocm_system
+from ..task_utils import infer_library_from_model_name_or_path, infer_task_from_model_name_or_path
LOGGER = getLogger("backend")
-HUB_KWARGS = {
- "revision": "main",
- "force_download": False,
- "local_files_only": False,
- "trust_remote_code": False,
-}
+HUB_KWARGS = {"revision": "main", "force_download": False, "local_files_only": False, "trust_remote_code": False}
@dataclass
@@ -31,10 +24,10 @@ class BackendConfig(ABC):
model: Optional[str] = None
device: Optional[str] = None
- # yes we use a string here instead of a list
- # it's easier to pass in a yaml or from cli
- # also it's consistent with CUDA_VISIBLE_DEVICES
device_ids: Optional[str] = None
+ # yes we use a string here instead of a list
+ # because it's easier to pass in a yaml or from cli
+ # and it's consistent with GPU environment variables
task: Optional[str] = None
library: Optional[str] = None
@@ -48,36 +41,49 @@ def __post_init__(self):
if self.model is None:
raise ValueError("`model` must be specified.")
+ if self.task is None:
+ self.task = infer_task_from_model_name_or_path(self.model)
+
if self.device is None:
self.device = "cuda" if is_nvidia_system() or is_rocm_system() else "cpu"
+ LOGGER.warning(f"`device` is not specified, defaulting to {self.device} based on system configuration.")
+
+ if self.device not in ["cuda", "cpu", "mps", "xla"]:
+ raise ValueError(f"`device` must be either `cuda`, `cpu`, `mps` or `xla`, but got {self.device}")
if ":" in self.device:
- # using device index
+ # support pytorch device index notation
self.device = self.device.split(":")[0]
self.device_ids = self.device.split(":")[1]
if self.device == "cuda":
if self.device_ids is None:
- self.device_ids = get_cuda_device_ids()
+ self.device_ids = get_gpu_device_ids()
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = self.device_ids
- # TODO: add rocm specific environment variables ?
- if self.device not in ["cuda", "cpu", "mps", "xla"]:
- raise ValueError(f"`device` must be either `cuda`, `cpu`, `mps` or `xla`, but got {self.device}")
-
- if self.task is None:
- self.task = infer_task_from_model_name_or_path(self.model)
+ if is_rocm_system():
+ # https://rocm.docs.amd.com/en/latest/conceptual/gpu-isolation.html
+ os.environ["GPU_DEVICE_ORDINAL"] = self.device_ids
+ os.environ["HIP_VISIBLE_DEVICES"] = self.device_ids
+ os.environ["ROCR_VISIBLE_DEVICES"] = self.device_ids
if self.library is None:
self.library = infer_library_from_model_name_or_path(self.model)
+ if self.library not in ["transformers", "diffusers", "timm"]:
+ raise ValueError(f"`library` must be either `transformers`, `diffusers` or `timm`, but got {self.library}")
+
if self.inter_op_num_threads is not None:
+ if not isinstance(self.inter_op_num_threads, int):
+ raise ValueError(f"`inter_op_num_threads` must be an integer, but got {self.inter_op_num_threads}")
if self.inter_op_num_threads == -1:
self.inter_op_num_threads = cpu_count()
if self.intra_op_num_threads is not None:
+ if not isinstance(self.intra_op_num_threads, int):
+ raise ValueError(f"`intra_op_num_threads` must be an integer, but got {self.intra_op_num_threads}")
if self.intra_op_num_threads == -1:
self.intra_op_num_threads = cpu_count()
diff --git a/optimum_benchmark/backends/diffusers_utils.py b/optimum_benchmark/backends/diffusers_utils.py
index 705436d3..5b0f56ce 100644
--- a/optimum_benchmark/backends/diffusers_utils.py
+++ b/optimum_benchmark/backends/diffusers_utils.py
@@ -5,7 +5,7 @@
from ..import_utils import is_diffusers_available
if is_diffusers_available():
- import diffusers
+ import diffusers # type: ignore
def get_diffusers_pretrained_config(model: str, **kwargs) -> Dict[str, int]:
diff --git a/optimum_benchmark/backends/neural_compressor/backend.py b/optimum_benchmark/backends/neural_compressor/backend.py
index dd2a7a82..cb70fdfc 100644
--- a/optimum_benchmark/backends/neural_compressor/backend.py
+++ b/optimum_benchmark/backends/neural_compressor/backend.py
@@ -1,22 +1,22 @@
-import os
import gc
-from typing import Any, Dict
+import os
from logging import getLogger
from tempfile import TemporaryDirectory
-
-from ...generators.dataset_generator import DatasetGenerator
-from ..transformers_utils import randomize_weights
-from .utils import TASKS_TO_INCMODELS
-from .config import INCConfig
-from ..base import Backend
+from typing import Any, Dict
import torch
from hydra.utils import get_class
-from transformers.utils import ModelOutput
+from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig, TuningCriterion
+from optimum.intel.neural_compressor.quantization import INCQuantizer
from transformers.modeling_utils import no_init_weights
+from transformers.utils import ModelOutput
from transformers.utils.logging import set_verbosity_error
-from optimum.intel.neural_compressor.quantization import INCQuantizer
-from neural_compressor.config import PostTrainingQuantConfig, AccuracyCriterion, TuningCriterion
+
+from ...generators.dataset_generator import DatasetGenerator
+from ..base import Backend
+from ..transformers_utils import randomize_weights
+from .config import INCConfig
+from .utils import TASKS_TO_INCMODELS
# disable transformers logging
set_verbosity_error()
@@ -128,15 +128,9 @@ def quantize_automodel(self) -> None:
if self.config.calibration:
LOGGER.info("\t+ Generating calibration dataset")
- dataset_shapes = {
- "dataset_size": 1,
- "sequence_length": 1,
- **self.model_shapes,
- }
+ dataset_shapes = {"dataset_size": 1, "sequence_length": 1, **self.model_shapes}
calibration_dataset = DatasetGenerator(
- task=self.config.task,
- dataset_shapes=dataset_shapes,
- model_shapes=self.model_shapes,
+ task=self.config.task, dataset_shapes=dataset_shapes, model_shapes=self.model_shapes
)()
columns_to_be_removed = list(set(calibration_dataset.column_names) - set(quantizer._signature_columns))
calibration_dataset = calibration_dataset.remove_columns(columns_to_be_removed)
diff --git a/optimum_benchmark/backends/neural_compressor/config.py b/optimum_benchmark/backends/neural_compressor/config.py
index 22becfe6..09623e47 100644
--- a/optimum_benchmark/backends/neural_compressor/config.py
+++ b/optimum_benchmark/backends/neural_compressor/config.py
@@ -1,17 +1,13 @@
-from typing import Any, Dict, Optional
from dataclasses import dataclass, field
+from typing import Any, Dict, Optional
from omegaconf import OmegaConf
-from ..config import BackendConfig
from ...import_utils import neural_compressor_version
+from ..config import BackendConfig
# https://github.com/intel/neural-compressor/blob/master/neural_compressor/config.py#L490
-ACCURACY_CRITERION_CONFIG = {
- "higher_is_better": True,
- "criterion": "relative",
- "tolerable_loss": 0.01,
-}
+ACCURACY_CRITERION_CONFIG = {"higher_is_better": True, "criterion": "relative", "tolerable_loss": 0.01}
# https://github.com/intel/neural-compressor/blob/master/neural_compressor/config.py#L593
TUNING_CRITERION_CONFIG = {
diff --git a/optimum_benchmark/backends/onnxruntime/backend.py b/optimum_benchmark/backends/onnxruntime/backend.py
index 07d5d860..0d2fc857 100644
--- a/optimum_benchmark/backends/onnxruntime/backend.py
+++ b/optimum_benchmark/backends/onnxruntime/backend.py
@@ -1,40 +1,40 @@
import gc
import os
-from logging import getLogger
from collections import OrderedDict
+from logging import getLogger
from tempfile import TemporaryDirectory
from typing import Any, Callable, Dict, List
-from ..base import Backend
-from .config import ORTConfig
-from ...task_utils import TEXT_GENERATION_TASKS
-from ...generators.dataset_generator import DatasetGenerator
-from .utils import format_calibration_config, format_quantization_config, TASKS_TO_ORTMODELS, TASKS_TO_ORTSD
-
import torch
from datasets import Dataset
from hydra.utils import get_class
from onnxruntime import SessionOptions
-from safetensors.torch import save_file
-from transformers import TrainerCallback
-from transformers.modeling_utils import no_init_weights
-from transformers.utils.logging import set_verbosity_error
-from optimum.onnxruntime.configuration import (
- AutoOptimizationConfig,
- AutoQuantizationConfig,
- AutoCalibrationConfig,
- OptimizationConfig,
- QuantizationConfig,
- CalibrationConfig,
-)
from optimum.onnxruntime import (
- ONNX_DECODER_WITH_PAST_NAME,
ONNX_DECODER_NAME,
- ORTTrainingArguments,
+ ONNX_DECODER_WITH_PAST_NAME,
ORTOptimizer,
ORTQuantizer,
ORTTrainer,
+ ORTTrainingArguments,
)
+from optimum.onnxruntime.configuration import (
+ AutoCalibrationConfig,
+ AutoOptimizationConfig,
+ AutoQuantizationConfig,
+ CalibrationConfig,
+ OptimizationConfig,
+ QuantizationConfig,
+)
+from safetensors.torch import save_file
+from transformers import TrainerCallback
+from transformers.modeling_utils import no_init_weights
+from transformers.utils.logging import set_verbosity_error
+
+from ...generators.dataset_generator import DatasetGenerator
+from ...task_utils import TEXT_GENERATION_TASKS
+from ..base import Backend
+from .config import ORTConfig
+from .utils import TASKS_TO_ORTMODELS, TASKS_TO_ORTSD, format_calibration_config, format_quantization_config
# disable transformers logging
set_verbosity_error()
@@ -199,8 +199,7 @@ def optimize_onnx_files(self) -> None:
)
elif self.config.optimization:
optimization_config = OptimizationConfig(
- optimize_for_gpu=(self.config.device == "cuda"),
- **self.config.optimization_config,
+ optimize_for_gpu=(self.config.device == "cuda"), **self.config.optimization_config
)
LOGGER.info("\t+ Creating optimizer")
optimizer = ORTOptimizer.from_pretrained(self.config.model, file_names=self.onnx_files_names)
@@ -243,15 +242,9 @@ def quantize_onnx_files(self) -> None:
if self.is_calibrated:
LOGGER.info("\t+ Generating calibration dataset")
- dataset_shapes = {
- "dataset_size": 1,
- "sequence_length": 1,
- **self.model_shapes,
- }
+ dataset_shapes = {"dataset_size": 1, "sequence_length": 1, **self.model_shapes}
calibration_dataset = DatasetGenerator(
- task=self.config.task,
- dataset_shapes=dataset_shapes,
- model_shapes=self.model_shapes,
+ task=self.config.task, dataset_shapes=dataset_shapes, model_shapes=self.model_shapes
)()
columns_to_be_removed = list(set(calibration_dataset.column_names) - set(self.inputs_names))
calibration_dataset = calibration_dataset.remove_columns(columns_to_be_removed)
@@ -260,10 +253,7 @@ def quantize_onnx_files(self) -> None:
if self.config.auto_calibration is not None:
LOGGER.info("\t+ Processing calibration config")
auto_calibration_method = getattr(AutoCalibrationConfig, self.config.auto_calibration)
- calibration_config = auto_calibration_method(
- calibration_dataset,
- **self.config.auto_calibration_config,
- )
+ calibration_config = auto_calibration_method(calibration_dataset, **self.config.auto_calibration_config)
elif self.config.calibration:
LOGGER.info("\t+ Processing calibration config")
calibration_config = format_calibration_config(self.config.calibration_config)
diff --git a/optimum_benchmark/backends/onnxruntime/config.py b/optimum_benchmark/backends/onnxruntime/config.py
index e0191b88..19ad747d 100644
--- a/optimum_benchmark/backends/onnxruntime/config.py
+++ b/optimum_benchmark/backends/onnxruntime/config.py
@@ -1,9 +1,9 @@
import os
-from typing import Any, Dict, Optional
from dataclasses import dataclass, field
+from typing import Any, Dict, Optional
-from ..config import BackendConfig
from ...import_utils import onnxruntime_version
+from ..config import BackendConfig
from ..peft_utils import PEFT_CONFIGS, PEFT_TASKS_TYPES
QUANTIZATION_CONFIG = {
@@ -18,14 +18,11 @@
}
AUTO_QUANTIZATION_CONFIG = {
- "is_static": False,
+ "is_static": False
# is_static is mandatory
}
-TRT_PROVIDER_OPTIONS = {
- "trt_engine_cache_enable": True,
- "trt_engine_cache_path": "/tmp/trt_cache",
-}
+TRT_PROVIDER_OPTIONS = {"trt_engine_cache_enable": True, "trt_engine_cache_path": "/tmp/trt_cache"}
IO_BINDING_LIBRARIES = ["transformers", "timm"]
IO_BINDING_PROVIDERS = ["CPUExecutionProvider", "CUDAExecutionProvider"]
@@ -103,10 +100,7 @@ def __post_init__(self):
os.makedirs(self.provider_options["trt_engine_cache_path"], exist_ok=True)
if self.quantization:
- self.quantization_config = {
- **QUANTIZATION_CONFIG,
- **self.quantization_config,
- }
+ self.quantization_config = {**QUANTIZATION_CONFIG, **self.quantization_config}
# raise ValueError if the quantization is static but calibration is not enabled
if self.quantization_config["is_static"] and self.auto_calibration is None and not self.calibration:
raise ValueError(
@@ -115,10 +109,7 @@ def __post_init__(self):
)
if self.auto_quantization is not None:
- self.auto_quantization_config = {
- **AUTO_QUANTIZATION_CONFIG,
- **self.auto_quantization_config,
- }
+ self.auto_quantization_config = {**AUTO_QUANTIZATION_CONFIG, **self.auto_quantization_config}
if self.auto_quantization_config["is_static"] and self.auto_calibration is None and not self.calibration:
raise ValueError(
"Quantization is static but calibration is not enabled. "
diff --git a/optimum_benchmark/backends/onnxruntime/utils.py b/optimum_benchmark/backends/onnxruntime/utils.py
index 759962f1..86eeeed9 100644
--- a/optimum_benchmark/backends/onnxruntime/utils.py
+++ b/optimum_benchmark/backends/onnxruntime/utils.py
@@ -1,13 +1,7 @@
from typing import Any, Dict
+from onnxruntime.quantization import CalibrationMethod, QuantFormat, QuantizationMode, QuantType
from optimum.pipelines import ORT_SUPPORTED_TASKS
-from onnxruntime.quantization import (
- CalibrationMethod,
- QuantizationMode,
- QuantFormat,
- QuantType,
-)
-
TASKS_TO_ORTSD = {
"stable-diffusion": "optimum.onnxruntime.ORTStableDiffusionPipeline",
diff --git a/optimum_benchmark/backends/openvino/backend.py b/optimum_benchmark/backends/openvino/backend.py
index 73cbd63d..e883c3ac 100644
--- a/optimum_benchmark/backends/openvino/backend.py
+++ b/optimum_benchmark/backends/openvino/backend.py
@@ -1,26 +1,26 @@
import gc
-import os
import inspect
-from typing import Any, Dict
-from logging import getLogger
+import os
from collections import OrderedDict
+from logging import getLogger
from tempfile import TemporaryDirectory
-
-from ..base import Backend
-from .config import OVConfig
-from .utils import TASKS_TO_OVMODEL
-from ...task_utils import TEXT_GENERATION_TASKS
-from ..transformers_utils import randomize_weights
-from ...generators.dataset_generator import DatasetGenerator
+from typing import Any, Dict
import torch
from hydra.utils import get_class
from openvino.runtime import properties
-from safetensors.torch import save_file
+from optimum.intel.openvino import OVConfig as OVQuantizationConfig # naming conflict
from optimum.intel.openvino import OVQuantizer
+from safetensors.torch import save_file
from transformers.modeling_utils import no_init_weights
from transformers.utils.logging import set_verbosity_error
-from optimum.intel.openvino import OVConfig as OVQuantizationConfig # naming conflict
+
+from ...generators.dataset_generator import DatasetGenerator
+from ...task_utils import TEXT_GENERATION_TASKS
+from ..base import Backend
+from ..transformers_utils import randomize_weights
+from .config import OVConfig
+from .utils import TASKS_TO_OVMODEL
# disable transformers logging
set_verbosity_error()
@@ -143,15 +143,9 @@ def quantize_automodel(self) -> None:
if self.config.calibration:
LOGGER.info("\t+ Generating calibration dataset")
- dataset_shapes = {
- "dataset_size": 1,
- "sequence_length": 1,
- **self.model_shapes,
- }
+ dataset_shapes = {"dataset_size": 1, "sequence_length": 1, **self.model_shapes}
calibration_dataset = DatasetGenerator(
- task=self.config.task,
- dataset_shapes=dataset_shapes,
- model_shapes=self.model_shapes,
+ task=self.config.task, dataset_shapes=dataset_shapes, model_shapes=self.model_shapes
)()
columns_to_be_removed = list(set(calibration_dataset.column_names) - set(quantizer._export_input_names))
calibration_dataset = calibration_dataset.remove_columns(columns_to_be_removed)
diff --git a/optimum_benchmark/backends/openvino/config.py b/optimum_benchmark/backends/openvino/config.py
index 6f4ba460..6b6797eb 100644
--- a/optimum_benchmark/backends/openvino/config.py
+++ b/optimum_benchmark/backends/openvino/config.py
@@ -1,8 +1,8 @@
from dataclasses import dataclass, field
from typing import Any, Dict, Optional
-from ..config import BackendConfig
from ...import_utils import openvino_version
+from ..config import BackendConfig
@dataclass
diff --git a/optimum_benchmark/backends/openvino/utils.py b/optimum_benchmark/backends/openvino/utils.py
index 8a39824d..b1005f38 100644
--- a/optimum_benchmark/backends/openvino/utils.py
+++ b/optimum_benchmark/backends/openvino/utils.py
@@ -1,8 +1,4 @@
from optimum.intel.openvino.utils import _HEAD_TO_AUTOMODELS
TASKS_TO_OVMODEL = {task: f"optimum.intel.openvino.{ovmodel}" for task, ovmodel in _HEAD_TO_AUTOMODELS.items()}
-TASKS_TO_OVMODEL.update(
- {
- "feature-extraction": "optimum.intel.openvino.OVModelForFeatureExtraction",
- }
-)
+TASKS_TO_OVMODEL.update({"feature-extraction": "optimum.intel.openvino.OVModelForFeatureExtraction"})
diff --git a/optimum_benchmark/backends/peft_utils.py b/optimum_benchmark/backends/peft_utils.py
index 1a367120..8ec7d1fa 100644
--- a/optimum_benchmark/backends/peft_utils.py
+++ b/optimum_benchmark/backends/peft_utils.py
@@ -4,23 +4,16 @@
if is_peft_available():
from peft import (
+ AdaLoraConfig,
IA3Config,
LoraConfig,
PeftConfig,
- AdaLoraConfig,
PrefixTuningConfig,
PromptEncoderConfig,
PromptLearningConfig,
)
-PEFT_TASKS_TYPES = [
- "SEQ_CLS",
- "SEQ_2_SEQ_LM",
- "CAUSAL_LM",
- "TOKEN_CLS",
- "QUESTION_ANS",
- "FEATURE_EXTRACTION",
-]
+PEFT_TASKS_TYPES = ["SEQ_CLS", "SEQ_2_SEQ_LM", "CAUSAL_LM", "TOKEN_CLS", "QUESTION_ANS", "FEATURE_EXTRACTION"]
PEFT_CONFIG = {
"base_model_name_or_path": None,
diff --git a/optimum_benchmark/backends/pytorch/backend.py b/optimum_benchmark/backends/pytorch/backend.py
index 268f4306..f7fdf7ab 100644
--- a/optimum_benchmark/backends/pytorch/backend.py
+++ b/optimum_benchmark/backends/pytorch/backend.py
@@ -1,29 +1,33 @@
import gc
import os
-from logging import getLogger
from collections import OrderedDict
+from logging import getLogger
from tempfile import TemporaryDirectory
from typing import Any, Callable, Dict, List
-from ..base import Backend
-from .config import PyTorchConfig
-from ..peft_utils import get_peft_config_class
-from ..transformers_utils import randomize_weights
-from ...import_utils import is_deepspeed_available, is_peft_available
-
+import datasets.utils.logging as datasets_logging
import torch
+import transformers.utils.logging as transformers_logging
from datasets import Dataset
from safetensors.torch import save_file
-import datasets.utils.logging as datasets_logging
+from transformers import Trainer, TrainerCallback, TrainerState, TrainingArguments
from transformers.modeling_utils import no_init_weights
-import transformers.utils.logging as transformers_logging
-from transformers import TrainerCallback, TrainerState, Trainer, TrainingArguments
+
+from ...import_utils import is_deepspeed_available, is_peft_available, is_torch_distributed_available
+from ..base import Backend
+from ..peft_utils import get_peft_config_class
+from ..transformers_utils import randomize_weights
+from .config import PyTorchConfig
if is_peft_available():
- from peft import get_peft_model
+ from peft import get_peft_model # type: ignore
+
+if is_torch_distributed_available():
+ import torch.distributed
if is_deepspeed_available():
- from deepspeed import init_inference
+ from deepspeed import init_inference # type: ignore
+
# disable other loggers
datasets_logging.set_verbosity_error()
@@ -94,14 +98,12 @@ def __init__(self, config: PyTorchConfig):
LOGGER.info("\t+ Using torch.compile on unet forward pass")
# TODO: should we compile vae and/or clip as well ?
self.pretrained_model.unet.forward = torch.compile(
- self.pretrained_model.unet.forward,
- **self.config.torch_compile_config,
+ self.pretrained_model.unet.forward, **self.config.torch_compile_config
)
else:
LOGGER.info("\t+ Using torch.compile on forward pass")
self.pretrained_model.forward = torch.compile(
- self.pretrained_model.forward,
- **self.config.torch_compile_config,
+ self.pretrained_model.forward, **self.config.torch_compile_config
)
if self.config.peft_strategy is not None:
@@ -176,9 +178,7 @@ def load_model_from_pretrained(self) -> None:
LOGGER.info(f"\t+ Loading model directly on device: {self.config.device}")
with torch.device(self.config.device):
self.pretrained_model = self.automodel_class.from_pretrained(
- pretrained_model_name_or_path=self.config.model,
- **self.config.hub_kwargs,
- **self.automodel_kwargs,
+ pretrained_model_name_or_path=self.config.model, **self.config.hub_kwargs, **self.automodel_kwargs
)
def create_no_weights_model(self) -> None:
@@ -233,30 +233,21 @@ def process_quantization_config(self) -> None:
from transformers import GPTQConfig
self.quantization_config = GPTQConfig(
- **dict(
- getattr(self.pretrained_config, "quantization_config", {}),
- **self.config.quantization_config,
- )
+ **dict(getattr(self.pretrained_config, "quantization_config", {}), **self.config.quantization_config)
)
elif self.is_awq_quantized:
LOGGER.info("\t+ Processing AWQ config")
from transformers import AwqConfig
self.quantization_config = AwqConfig(
- **dict(
- getattr(self.pretrained_config, "quantization_config", {}),
- **self.config.quantization_config,
- )
+ **dict(getattr(self.pretrained_config, "quantization_config", {}), **self.config.quantization_config)
)
elif self.is_bnb_quantized:
LOGGER.info("\t+ Processing BitsAndBytes config")
from transformers import BitsAndBytesConfig
self.quantization_config = BitsAndBytesConfig(
- **dict(
- getattr(self.pretrained_config, "quantization_config", {}),
- **self.config.quantization_config,
- )
+ **dict(getattr(self.pretrained_config, "quantization_config", {}), **self.config.quantization_config)
)
else:
self.quantization_config = None
@@ -290,8 +281,8 @@ def is_awq_quantized(self) -> bool:
def is_exllamav2(self) -> bool:
return (
self.is_gptq_quantized
- and "exllama_config" in self.quantization_config
- and self.quantization_config["exllama_config"].get("version", None) == 2
+ and hasattr(self.quantization_config, "exllama_config")
+ and self.quantization_config.exllama_config.get("version", None) == 2
)
@property
@@ -369,6 +360,10 @@ def seed(self):
torch.cuda.manual_seed_all(self.config.seed)
def clean(self) -> None:
+ if is_torch_distributed_available() and torch.distributed.is_initialized():
+ LOGGER.info("\t+ Waiting for distributed processes to finish before cleaning backend")
+ torch.distributed.barrier()
+
super().clean()
if hasattr(self, "tmpdir"):
diff --git a/optimum_benchmark/backends/pytorch/config.py b/optimum_benchmark/backends/pytorch/config.py
index d8089f60..7902719d 100644
--- a/optimum_benchmark/backends/pytorch/config.py
+++ b/optimum_benchmark/backends/pytorch/config.py
@@ -1,20 +1,16 @@
from dataclasses import dataclass, field
from typing import Any, Dict, Optional
-from ..config import BackendConfig
-from ...env_utils import is_rocm_system
from ...import_utils import torch_version
+from ...system_utils import is_rocm_system
+from ..config import BackendConfig
from ..peft_utils import PEFT_CONFIGS, PEFT_TASKS_TYPES
DEVICE_MAPS = ["auto", "sequential"]
AMP_DTYPES = ["bfloat16", "float16"]
TORCH_DTYPES = ["bfloat16", "float16", "float32", "auto"]
-QUANTIZATION_CONFIGS = {
- "bnb": {"llm_int8_threshold": 0.0},
- "gptq": {},
- "awq": {},
-}
+QUANTIZATION_CONFIGS = {"bnb": {"llm_int8_threshold": 0.0}, "gptq": {}, "awq": {}}
COMPILE_CONFIG = {
"fullgraph": False,
"dynamic": False,
@@ -89,10 +85,7 @@ def __post_init__(self):
if self.quantization_config:
QUANTIZATION_CONFIG = QUANTIZATION_CONFIGS[self.quantization_scheme]
- self.quantization_config = {
- **QUANTIZATION_CONFIG,
- **self.quantization_config,
- }
+ self.quantization_config = {**QUANTIZATION_CONFIG, **self.quantization_config}
if self.peft_strategy is not None:
if self.peft_strategy not in PEFT_CONFIGS:
diff --git a/optimum_benchmark/backends/tensorrt_llm/backend.py b/optimum_benchmark/backends/tensorrt_llm/backend.py
index 7c86adeb..3beb1387 100644
--- a/optimum_benchmark/backends/tensorrt_llm/backend.py
+++ b/optimum_benchmark/backends/tensorrt_llm/backend.py
@@ -1,13 +1,13 @@
from logging import getLogger
from typing import Any, Dict
+from hydra.utils import get_class
+from transformers.utils import ModelOutput
+
from ..base import Backend
from .config import TRTLLMConfig
from .utils import MODEL_TYPE_TO_TRTLLMMODEL
-from hydra.utils import get_class
-from transformers.utils import ModelOutput
-
LOGGER = getLogger("tensorrt-llm")
@@ -47,9 +47,7 @@ def load_trtmodel_from_pretrained(self) -> None:
def forward(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> ModelOutput:
return self.pretrained_model.generate(
- input_ids=inputs.get("input_ids", None),
- attention_mask=inputs.get("attention_mask", None),
- max_new_tokens=1,
+ input_ids=inputs.get("input_ids", None), attention_mask=inputs.get("attention_mask", None), max_new_tokens=1
)
def generate(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> ModelOutput:
diff --git a/optimum_benchmark/backends/tensorrt_llm/config.py b/optimum_benchmark/backends/tensorrt_llm/config.py
index e676accb..d7f4b1cb 100644
--- a/optimum_benchmark/backends/tensorrt_llm/config.py
+++ b/optimum_benchmark/backends/tensorrt_llm/config.py
@@ -1,9 +1,8 @@
-from typing import Optional
from dataclasses import dataclass
+from typing import Optional
-from ..config import BackendConfig
from ...import_utils import tesnorrt_llm_version
-
+from ..config import BackendConfig
SUPPORTED_DTYPES = ["float16", "bfloat16", "float32"]
diff --git a/optimum_benchmark/backends/text_generation_inference/backend.py b/optimum_benchmark/backends/text_generation_inference/backend.py
index 538de53c..c7ecd5ce 100644
--- a/optimum_benchmark/backends/text_generation_inference/backend.py
+++ b/optimum_benchmark/backends/text_generation_inference/backend.py
@@ -1,23 +1,24 @@
import gc
import os
import time
+from concurrent.futures import ThreadPoolExecutor
from logging import getLogger
-from typing import Any, Dict, List
from tempfile import TemporaryDirectory
-from concurrent.futures import ThreadPoolExecutor
-
-from ..base import Backend
-from .config import TGIConfig
-from ...task_utils import TEXT_GENERATION_TASKS
-from ..transformers_utils import randomize_weights
+from typing import Any, Dict, List
import torch
-import docker
-import docker.types
-import docker.errors
-from safetensors.torch import save_model
from huggingface_hub import InferenceClient, snapshot_download
from huggingface_hub.inference._text_generation import TextGenerationResponse
+from safetensors.torch import save_model
+
+import docker
+import docker.errors
+import docker.types
+
+from ...task_utils import TEXT_GENERATION_TASKS
+from ..base import Backend
+from ..transformers_utils import randomize_weights
+from .config import TGIConfig
# bachend logger
LOGGER = getLogger("text-generation-inference")
@@ -59,12 +60,7 @@ def load_model_from_pretrained(self) -> None:
model_cache_path = f"{self.config.volume}/{model_cache_folder}"
snapshot_ref = (
- open(
- f"{model_cache_path}/refs/{self.config.hub_kwargs.get('revision', 'main')}",
- "r",
- )
- .read()
- .strip()
+ open(f"{model_cache_path}/refs/{self.config.hub_kwargs.get('revision', 'main')}", "r").read().strip()
)
model_snapshot_path = f"{model_cache_path}/snapshots/{snapshot_ref}"
@@ -133,12 +129,7 @@ def start_tgi_server(self) -> None:
env["HUGGING_FACE_HUB_TOKEN"] = os.environ["HUGGING_FACE_HUB_TOKEN"]
LOGGER.info("\t+ Building TGI command")
- self.command = [
- "--model-id",
- self.config.model,
- "--revision",
- self.config.hub_kwargs.get("revision", "main"),
- ]
+ self.command = ["--model-id", self.config.model, "--revision", self.config.hub_kwargs.get("revision", "main")]
if self.config.sharded is not None:
self.command.extend(["--sharded", str(self.config.sharded).lower()])
diff --git a/optimum_benchmark/backends/timm_utils.py b/optimum_benchmark/backends/timm_utils.py
index 9e2924b2..07105003 100644
--- a/optimum_benchmark/backends/timm_utils.py
+++ b/optimum_benchmark/backends/timm_utils.py
@@ -1,6 +1,6 @@
from typing import Any, Dict, Optional
-from ..import_utils import is_timm_available, is_transformers_available, is_torch_available
+from ..import_utils import is_timm_available, is_torch_available, is_transformers_available
if is_torch_available():
import torch
diff --git a/optimum_benchmark/backends/torch_ort/backend.py b/optimum_benchmark/backends/torch_ort/backend.py
index a7515d2f..52bede74 100644
--- a/optimum_benchmark/backends/torch_ort/backend.py
+++ b/optimum_benchmark/backends/torch_ort/backend.py
@@ -4,18 +4,22 @@
from tempfile import TemporaryDirectory
from typing import Any, Callable, Dict, List
-from ..transformers_utils import randomize_weights
-from ..peft_utils import get_peft_config_class
-from .config import TorchORTConfig
-from ..base import Backend
-
import torch
from datasets import Dataset
+from optimum.onnxruntime import ORTTrainer, ORTTrainingArguments
from safetensors.torch import save_file
from transformers import TrainerCallback, TrainerState
from transformers.modeling_utils import no_init_weights
from transformers.utils.logging import set_verbosity_error
-from optimum.onnxruntime import ORTTrainer, ORTTrainingArguments
+
+from ...import_utils import is_peft_available
+from ..base import Backend
+from ..peft_utils import get_peft_config_class
+from ..transformers_utils import randomize_weights
+from .config import TorchORTConfig
+
+if is_peft_available():
+ from peft import get_peft_model # type: ignore
# disable transformers logging
set_verbosity_error()
@@ -39,9 +43,7 @@ def __init__(self, config: TorchORTConfig):
self.load_automodel_from_pretrained()
if self.config.peft_strategy is not None:
- LOGGER.info("\t+ Applying PEFT")
- from peft import get_peft_model
-
+ LOGGER.info("\t+ Using PEFT")
peft_config_class = get_peft_config_class(self.config.peft_strategy)
peft_config = peft_config_class(**self.config.peft_config)
self.pretrained_model = get_peft_model(self.pretrained_model, peft_config=peft_config)
@@ -87,9 +89,7 @@ def load_automodel_with_no_weights(self) -> None:
def load_automodel_from_pretrained(self) -> None:
self.pretrained_model = self.automodel_class.from_pretrained(
- self.config.model,
- **self.automodel_kwargs,
- **self.config.hub_kwargs,
+ self.config.model, **self.automodel_kwargs, **self.config.hub_kwargs
).to(self.config.device)
@property
diff --git a/optimum_benchmark/backends/torch_ort/config.py b/optimum_benchmark/backends/torch_ort/config.py
index ac2de2f7..8559022f 100644
--- a/optimum_benchmark/backends/torch_ort/config.py
+++ b/optimum_benchmark/backends/torch_ort/config.py
@@ -1,8 +1,8 @@
from dataclasses import dataclass, field
from typing import Any, Dict, Optional
-from ..config import BackendConfig
from ...import_utils import torch_ort_version
+from ..config import BackendConfig
from ..peft_utils import PEFT_CONFIGS, PEFT_TASKS_TYPES
diff --git a/optimum_benchmark/backends/transformers_utils.py b/optimum_benchmark/backends/transformers_utils.py
index 1d7ad410..6835617a 100644
--- a/optimum_benchmark/backends/transformers_utils.py
+++ b/optimum_benchmark/backends/transformers_utils.py
@@ -1,29 +1,24 @@
import os
from typing import Any, Dict, Optional, Union
-from ..import_utils import is_transformers_available, is_torch_available
+from ..import_utils import is_torch_available, is_transformers_available
if is_torch_available():
import torch
if is_transformers_available():
from transformers import (
+ AutoConfig,
+ AutoProcessor,
FeatureExtractionMixin,
- ImageProcessingMixin,
- PreTrainedTokenizer,
GenerationConfig,
+ ImageProcessingMixin,
PretrainedConfig,
+ PreTrainedTokenizer,
ProcessorMixin,
- AutoProcessor,
- AutoConfig,
)
- PretrainedProcessor = Union[
- FeatureExtractionMixin,
- ImageProcessingMixin,
- PreTrainedTokenizer,
- ProcessorMixin,
- ]
+ PretrainedProcessor = Union[FeatureExtractionMixin, ImageProcessingMixin, PreTrainedTokenizer, ProcessorMixin]
def get_transformers_cache_dir() -> str:
@@ -52,8 +47,7 @@ def get_transformers_pre_processor(model: str, **kwargs) -> Optional["Pretrained
def extract_transformers_shapes_from_artifacts(
- config: "PretrainedConfig",
- processor: Optional["PretrainedProcessor"] = None,
+ config: "PretrainedConfig", processor: Optional["PretrainedProcessor"] = None
) -> Dict[str, Any]:
artifacts_dict = {}
diff --git a/optimum_benchmark/benchmarks/base.py b/optimum_benchmark/benchmarks/base.py
index 84495a1a..a8c42806 100644
--- a/optimum_benchmark/benchmarks/base.py
+++ b/optimum_benchmark/benchmarks/base.py
@@ -3,8 +3,8 @@
from typing import ClassVar, Generic
from ..backends.base import Backend
-from .report import BenchmarkReport
from .config import BenchmarkConfigT
+from .report import BenchmarkReport
LOGGER = getLogger("benchmark")
diff --git a/optimum_benchmark/benchmarks/config.py b/optimum_benchmark/benchmarks/config.py
index f3e96348..76d102af 100644
--- a/optimum_benchmark/benchmarks/config.py
+++ b/optimum_benchmark/benchmarks/config.py
@@ -1,8 +1,7 @@
from abc import ABC
-from typing import TypeVar
-from logging import getLogger
from dataclasses import dataclass
-
+from logging import getLogger
+from typing import TypeVar
LOGGER = getLogger("benchmark")
diff --git a/optimum_benchmark/benchmarks/inference/benchmark.py b/optimum_benchmark/benchmarks/inference/benchmark.py
index 9cc96ee1..07c4f9ee 100644
--- a/optimum_benchmark/benchmarks/inference/benchmark.py
+++ b/optimum_benchmark/benchmarks/inference/benchmark.py
@@ -1,26 +1,23 @@
+from dataclasses import dataclass
from logging import getLogger
-from typing import List, Tuple, Dict
-from ..base import Benchmark
-from .config import InferenceConfig
-from ...trackers.energy import EnergyTracker
-from ...trackers.memory import MemoryTracker
-from ...trackers.latency import LatencyTracker
from ...backends.base import Backend, BackendConfigT
from ...generators.input_generator import InputGenerator
from ...import_utils import is_torch_distributed_available
-from ...task_utils import TEXT_GENERATION_TASKS, IMAGE_DIFFUSION_TASKS
-from .report import InferenceReport, TextGenerationReport, ImageDiffusionReport
+from ...task_utils import IMAGE_DIFFUSION_TASKS, TEXT_GENERATION_TASKS
+from ...trackers.energy import Efficiency, EnergyTracker
+from ...trackers.latency import LatencyTracker, Throughput
+from ...trackers.memory import MemoryTracker
+from ..base import Benchmark
+from ..report import BenchmarkMeasurements, BenchmarkReport
+from .config import InferenceConfig
if is_torch_distributed_available():
import torch.distributed
LOGGER = getLogger("inference")
-IMAGE_DIFFUSION_KWARGS = {
- "num_inference_steps": 30,
- "num_images_per_prompt": 1,
-}
+IMAGE_DIFFUSION_KWARGS = {"num_inference_steps": 30, "num_images_per_prompt": 1}
TEXT_GENERATION_KWARGS = {
"num_return_sequences": 1,
@@ -33,6 +30,33 @@
"num_beams": 1,
}
+EFFICIENCY_UNIT = "samples/kWh"
+THROUGHPUT_UNIT = "samples/s"
+
+PREFILL_THROUGHPUT_UNIT = "tokens/s"
+DECODE_THROUGHPUT_UNIT = "tokens/s"
+CALL_THROUGHPUT_UNIT = "images/s"
+
+PREFILL_EFFICIENCY_UNIT = "tokens/kWh"
+DECODE_EFFICIENCY_UNIT = "tokens/kWh"
+CALL_EFFICIENCY_UNIT = "images/kWh"
+
+
+@dataclass
+class InferenceReport(BenchmarkReport):
+ forward: BenchmarkMeasurements
+
+
+@dataclass
+class ImageDiffusionReport(BenchmarkReport):
+ call: BenchmarkMeasurements
+
+
+@dataclass
+class TextGenerationReport(BenchmarkReport):
+ prefill: BenchmarkMeasurements
+ decode: BenchmarkMeasurements
+
class InferenceBenchmark(Benchmark[InferenceConfig]):
NAME = "inference"
@@ -42,17 +66,18 @@ def __init__(self, config: InferenceConfig) -> None:
def run(self, backend: Backend[BackendConfigT]) -> None:
if is_torch_distributed_available() and torch.distributed.is_initialized():
+ LOGGER.info("\t+ Distributing batch size across processes")
if self.config.input_shapes["batch_size"] % torch.distributed.get_world_size() != 0:
raise ValueError(
"The batch size must be divisible by the number of processes in a distributed environment"
)
self.config.input_shapes["batch_size"] //= torch.distributed.get_world_size()
+ if backend.config.device == "cuda" and backend.config.task in TEXT_GENERATION_TASKS:
+ TEXT_GENERATION_TASKS["synced_gpus"] = True
LOGGER.info("\t+ Creating input generator")
self.input_generator = InputGenerator(
- task=backend.config.task,
- model_shapes=backend.model_shapes,
- input_shapes=self.config.input_shapes,
+ task=backend.config.task, model_shapes=backend.model_shapes, input_shapes=self.config.input_shapes
)
if backend.config.task in TEXT_GENERATION_TASKS:
@@ -64,12 +89,7 @@ def run(self, backend: Backend[BackendConfigT]) -> None:
LOGGER.info("\t+ Updating Text Generation kwargs with default values")
self.config.generate_kwargs = {**TEXT_GENERATION_KWARGS, **self.config.generate_kwargs}
LOGGER.info("\t+ Initializing Text Generation report")
- self.report = TextGenerationReport(
- batch_size=self.config.input_shapes["batch_size"],
- sequence_length=self.config.input_shapes["sequence_length"],
- num_new_tokens=self.config.generate_kwargs["max_new_tokens"],
- num_return_sequences=self.config.generate_kwargs["num_return_sequences"],
- )
+ self.report = TextGenerationReport(prefill=BenchmarkMeasurements(), decode=BenchmarkMeasurements())
elif backend.config.task in IMAGE_DIFFUSION_TASKS:
LOGGER.info("\t+ Generating and preparing Image Diffusion input")
@@ -78,19 +98,14 @@ def run(self, backend: Backend[BackendConfigT]) -> None:
LOGGER.info("\t+ Updating Image Diffusion kwargs with default values")
self.config.forward_kwargs = {**IMAGE_DIFFUSION_KWARGS, **self.config.forward_kwargs}
LOGGER.info("\t+ Initializing Image Diffusion report")
- self.report = ImageDiffusionReport(
- batch_size=self.config.input_shapes["batch_size"],
- num_images_per_prompts=self.config.forward_kwargs["num_images_per_prompt"],
- )
+ self.report = ImageDiffusionReport(call=BenchmarkMeasurements())
else:
LOGGER.info("\t+ Generating and preparing Inference input")
self.forward_inputs = self.input_generator(mode="forward")
self.forward_inputs = backend.prepare_inputs(self.forward_inputs)
LOGGER.info("\t+ Initializing Inference report")
- self.report = InferenceReport(
- batch_size=self.config.input_shapes["batch_size"],
- )
+ self.report = InferenceReport(forward=BenchmarkMeasurements())
LOGGER.info("\t+ Preparing backend for Inference")
backend.prepare_for_inference(
@@ -103,11 +118,9 @@ def run(self, backend: Backend[BackendConfigT]) -> None:
LOGGER.info("\t+ Warming up backend for Inference")
for _ in range(self.config.warmup_runs):
if backend.config.task in TEXT_GENERATION_TASKS:
- generate_warmup_kwargs = {"max_new_tokens": 2, "min_new_tokens": 2}
- _ = backend.generate(self.generate_input, generate_warmup_kwargs)
+ _ = backend.generate(self.generate_input, {"max_new_tokens": 2, "min_new_tokens": 2})
elif backend.config.task in IMAGE_DIFFUSION_TASKS:
- diffuse_warmup_kwargs = {"num_inference_steps": 2}
- _ = backend.call(self.diffuse_input, diffuse_warmup_kwargs)
+ _ = backend.call(self.diffuse_input, {"num_inference_steps": 2})
else:
_ = backend.forward(self.forward_inputs, self.config.forward_kwargs)
@@ -117,14 +130,11 @@ def run(self, backend: Backend[BackendConfigT]) -> None:
backend=backend.config.name, device=backend.config.device, device_ids=backend.config.device_ids
)
if backend.config.task in TEXT_GENERATION_TASKS:
- forward_memories_dict, generate_memories_dict = self.run_text_generation_memory_tracking(backend)
- self.report.populate_memory(forward_memories_dict, generate_memories_dict)
+ self.run_text_generation_memory_tracking(backend)
elif backend.config.task in IMAGE_DIFFUSION_TASKS:
- call_memories_dict = self.run_image_diffusion_memory_tracking(backend)
- self.report.populate_memory(call_memories_dict)
+ self.run_image_diffusion_memory_tracking(backend)
else:
- forward_memories_dict = self.run_inference_memory_tracking(backend)
- self.report.populate_memory(forward_memories_dict)
+ self.run_inference_memory_tracking(backend)
self.report.log_memory()
@@ -132,146 +142,170 @@ def run(self, backend: Backend[BackendConfigT]) -> None:
LOGGER.info("\t+ Creating inference latency tracker")
self.latency_tracker = LatencyTracker(backend=backend.config.name, device=backend.config.device)
if backend.config.task in TEXT_GENERATION_TASKS:
- forward_latencies_dict, generate_latencies_dict = self.run_text_generation_latency_tracking(backend)
- self.report.populate_latency(forward_latencies_dict, generate_latencies_dict)
+ self.run_text_generation_latency_tracking(backend)
elif backend.config.task in IMAGE_DIFFUSION_TASKS:
- call_latencies_dict = self.run_image_diffusion_latency_tracking(backend)
- self.report.populate_latency(call_latencies_dict)
+ self.run_image_diffusion_latency_tracking(backend)
else:
- forward_latencies_dict = self.run_latency_inference_tracking(backend)
- self.report.populate_latency(forward_latencies_dict)
+ self.run_latency_inference_tracking(backend)
self.report.log_latency()
+ self.report.log_throughput()
if self.config.energy:
LOGGER.info("\t+ Creating inference energy tracker")
self.energy_tracker = EnergyTracker(device=backend.config.device, device_ids=backend.config.device_ids)
if backend.config.task in TEXT_GENERATION_TASKS:
- forward_energies_dict, generate_energies_dict = self.run_text_generation_energy_tracking(backend)
- self.report.populate_energy(forward_energies_dict, generate_energies_dict)
+ self.run_text_generation_energy_tracking(backend)
elif backend.config.task in IMAGE_DIFFUSION_TASKS:
- call_energies_dict = self.run_image_diffusion_energy_tracking(backend)
- self.report.populate_energy(call_energies_dict)
+ self.run_image_diffusion_energy_tracking(backend)
else:
- forward_energies_dict = self.run_inference_energy_tracking(backend)
- self.report.populate_energy(forward_energies_dict)
+ self.run_inference_energy_tracking(backend)
self.report.log_energy()
+ self.report.log_efficiency()
+
+ self.report.log()
## Memory tracking
- def run_text_generation_memory_tracking(self, backend: Backend) -> Tuple[Dict[str, float], Dict[str, float]]:
+ def run_text_generation_memory_tracking(self, backend: Backend):
LOGGER.info("\t+ Running memory tracking")
self.memory_tracker.reset()
with self.memory_tracker.track():
_ = backend.forward(self.forward_inputs, self.config.forward_kwargs)
- forward_memories_dict = self.memory_tracker.get_memories_dict()
+ self.report.prefill.memory = self.memory_tracker.get_max_memory()
self.memory_tracker.reset()
with self.memory_tracker.track():
_ = backend.generate(self.generate_input, self.config.generate_kwargs)
- generate_memories_dict = self.memory_tracker.get_memories_dict()
+ self.report.decode.memory = self.memory_tracker.get_max_memory()
- return forward_memories_dict, generate_memories_dict
-
- def run_image_diffusion_memory_tracking(self, backend: Backend) -> Dict[str, float]:
+ def run_image_diffusion_memory_tracking(self, backend: Backend):
LOGGER.info("\t+ Running memory tracking")
self.memory_tracker.reset()
with self.memory_tracker.track():
_ = backend.call(self.diffuse_input, self.config.forward_kwargs)
- call_memories_dict = self.memory_tracker.get_memories_dict()
-
- return call_memories_dict
+ self.report.call.memory = self.memory_tracker.get_max_memory()
- def run_inference_memory_tracking(self, backend: Backend) -> Dict[str, float]:
+ def run_inference_memory_tracking(self, backend: Backend):
LOGGER.info("\t+ Running memory tracking")
self.memory_tracker.reset()
with self.memory_tracker.track():
_ = backend.forward(self.forward_inputs, self.config.forward_kwargs)
- forward_memories_dict = self.memory_tracker.get_memories_dict()
-
- return forward_memories_dict
+ self.report.forward.memory = self.memory_tracker.get_max_memory()
## Latency tracking
- def run_text_generation_latency_tracking(self, backend: Backend) -> Tuple[List[float], List[float]]:
+ def run_text_generation_latency_tracking(self, backend: Backend):
LOGGER.info("\t+ Running latency tracking")
self.latency_tracker.reset()
- while self.latency_tracker.get_total_latency() < self.config.duration:
+ while self.latency_tracker.get_elapsed_time() < self.config.duration:
with self.latency_tracker.track():
_ = backend.forward(self.forward_inputs, self.config.forward_kwargs)
- forward_latencies_list = self.latency_tracker.get_latencies_list()
+ self.report.prefill.latency = self.latency_tracker.get_latency()
+ self.report.prefill.throughput = self.latency_tracker.get_throughput(
+ volume=self.prefill_volume, unit=PREFILL_THROUGHPUT_UNIT
+ )
self.latency_tracker.reset()
- while self.latency_tracker.get_total_latency() < self.config.duration:
+ while self.latency_tracker.get_elapsed_time() < self.config.duration:
with self.latency_tracker.track():
_ = backend.generate(self.generate_input, self.config.generate_kwargs)
- generate_latencies_list = self.latency_tracker.get_latencies_list()
-
- return forward_latencies_list, generate_latencies_list
+ self.report.decode.latency = self.latency_tracker.get_latency() - self.report.prefill.latency.mean
+ self.report.decode.throughput = Throughput.from_latency(
+ self.report.decode.latency, self.decode_volume, unit=DECODE_THROUGHPUT_UNIT
+ )
- def run_image_diffusion_latency_tracking(self, backend: Backend) -> List[float]:
+ def run_image_diffusion_latency_tracking(self, backend: Backend):
LOGGER.info("\t+ Running latency tracking")
self.latency_tracker.reset()
- while self.latency_tracker.get_total_latency() < self.config.duration:
+ while self.latency_tracker.get_elapsed_time() < self.config.duration:
with self.latency_tracker.track():
_ = backend.call(self.diffuse_input, self.config.forward_kwargs)
- call_latencies_list = self.latency_tracker.get_latencies_list()
-
- return call_latencies_list
+ self.report.call.latency = self.latency_tracker.get_latency()
+ self.report.call.throughput = Throughput.from_latency(
+ self.report.call.latency, self.call_volume, unit=CALL_THROUGHPUT_UNIT
+ )
- def run_latency_inference_tracking(self, backend: Backend) -> List[float]:
+ def run_latency_inference_tracking(self, backend: Backend):
LOGGER.info("\t+ Running latency tracking")
self.latency_tracker.reset()
- while self.latency_tracker.get_total_latency() < self.config.duration:
+ while self.latency_tracker.get_elapsed_time() < self.config.duration:
with self.latency_tracker.track():
_ = backend.forward(self.forward_inputs, self.config.forward_kwargs)
- forward_latencies_list = self.latency_tracker.get_latencies_list()
-
- return forward_latencies_list
+ self.report.forward.latency = self.latency_tracker.get_latency()
+ self.report.forward.throughput = Throughput.from_latency(
+ self.report.forward.latency, self.forward_volume, unit=THROUGHPUT_UNIT
+ )
## Energy tracking
- def run_text_generation_energy_tracking(self, backend: Backend) -> Tuple[Dict[str, float], Dict[str, float]]:
+ def run_text_generation_energy_tracking(self, backend: Backend):
LOGGER.info("\t+ Running energy tracking")
self.energy_tracker.reset()
with self.energy_tracker.track():
_ = backend.forward(self.forward_inputs, self.config.forward_kwargs)
- forward_energies_dict = self.energy_tracker.get_energies_dict()
+ self.report.prefill.energy = self.energy_tracker.get_energy()
+ self.report.prefill.efficiency = Efficiency.from_energy(
+ self.report.prefill.energy, self.prefill_volume, unit=PREFILL_EFFICIENCY_UNIT
+ )
self.energy_tracker.reset()
with self.energy_tracker.track():
_ = backend.generate(self.generate_input, self.config.generate_kwargs)
- generate_energies_dict = self.energy_tracker.get_energies_dict()
-
- return forward_energies_dict, generate_energies_dict
+ self.report.decode.energy = self.energy_tracker.get_energy() - self.report.prefill.energy
+ self.report.decode.efficiency = Efficiency.from_energy(
+ self.report.decode.energy, self.decode_volume, unit=DECODE_EFFICIENCY_UNIT
+ )
- def run_image_diffusion_energy_tracking(self, backend: Backend) -> Dict[str, float]:
+ def run_image_diffusion_energy_tracking(self, backend: Backend):
LOGGER.info("\t+ Running energy tracking")
self.energy_tracker.reset()
with self.energy_tracker.track():
_ = backend.call(self.diffuse_input, self.config.forward_kwargs)
- call_energies_dict = self.energy_tracker.get_energies_dict()
-
- return call_energies_dict
+ self.report.call.energy = self.energy_tracker.get_energy()
+ self.report.call.efficiency = Efficiency.from_energy(
+ self.report.call.energy, self.call_volume, unit=CALL_EFFICIENCY_UNIT
+ )
- def run_inference_energy_tracking(self, backend: Backend) -> Dict[str, float]:
+ def run_inference_energy_tracking(self, backend: Backend):
LOGGER.info("\t+ Running energy tracking")
self.energy_tracker.reset()
with self.energy_tracker.track():
_ = backend.forward(self.forward_inputs, self.config.forward_kwargs)
- forward_energies_dict = self.energy_tracker.get_energies_dict()
+ self.report.forward.energy = self.energy_tracker.get_energy()
+ self.report.forward.efficiency = Efficiency.from_energy(
+ self.report.forward.energy, self.forward_volume, unit=EFFICIENCY_UNIT
+ )
+
+ @property
+ def forward_volume(self) -> int: # in samples
+ return self.config.input_shapes["batch_size"]
+
+ @property
+ def prefill_volume(self) -> int: # in tokens
+ return self.config.input_shapes["batch_size"] * self.config.input_shapes["sequence_length"]
- return forward_energies_dict
+ @property
+ def call_volume(self) -> int: # in images
+ return self.config.input_shapes["batch_size"] * self.config.forward_kwargs["num_images_per_prompt"]
+
+ @property
+ def decode_volume(self) -> int: # in tokens
+ return (
+ self.config.input_shapes["batch_size"]
+ * self.config.generate_kwargs["num_return_sequences"]
+ * self.config.generate_kwargs["max_new_tokens"]
+ )
def get_report(self) -> InferenceReport:
return self.report
diff --git a/optimum_benchmark/benchmarks/inference/callback.py b/optimum_benchmark/benchmarks/inference/callback.py
deleted file mode 100644
index 4871691d..00000000
--- a/optimum_benchmark/benchmarks/inference/callback.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import time
-
-from ...import_utils import is_torch_available
-
-from transformers import LogitsProcessor
-
-if is_torch_available():
- import torch
-
-
-# TODO: uses this class for more fine-grained latency measurements in text generation
-class MeasurementProcessor(LogitsProcessor):
- def __init__(self, device: str, backend: str):
- self.device = device
- self.backend = backend
-
- self.latencies = []
-
- def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
- """
- Callback to track the time it takes to generate one batch of tokens.
- """
- self.latencies.append(time.perf_counter_ns())
-
- return scores
diff --git a/optimum_benchmark/benchmarks/inference/config.py b/optimum_benchmark/benchmarks/inference/config.py
index d5c4a0bb..7b6cfd3f 100644
--- a/optimum_benchmark/benchmarks/inference/config.py
+++ b/optimum_benchmark/benchmarks/inference/config.py
@@ -1,17 +1,13 @@
+from dataclasses import dataclass, field
from logging import getLogger
from typing import Any, Dict, Optional
-from dataclasses import dataclass, field
-from ...env_utils import is_rocm_system
+from ...system_utils import is_rocm_system
from ..config import BenchmarkConfig
LOGGER = getLogger("inference")
-INPUT_SHAPES = {
- "batch_size": 2,
- "sequence_length": 16,
- "num_choices": 2,
-}
+INPUT_SHAPES = {"batch_size": 2, "num_choices": 2, "sequence_length": 16}
@dataclass
@@ -40,16 +36,13 @@ class InferenceConfig(BenchmarkConfig):
# methods kwargs
forward_kwargs: Dict[str, Any] = field(
- default_factory=dict,
- metadata={"help": "Keyword arguments to pass to the forward method of the model."},
+ default_factory=dict, metadata={"help": "Keyword arguments to pass to the forward method of the model."}
)
generate_kwargs: Dict[str, Any] = field(
- default_factory=dict,
- metadata={"help": "Keyword arguments to pass to the generate method of the model."},
+ default_factory=dict, metadata={"help": "Keyword arguments to pass to the generate method of the model."}
)
call_kwargs: Dict[str, Any] = field(
- default_factory=dict,
- metadata={"help": "Keyword arguments to pass to the __call__ method of the pipeline."},
+ default_factory=dict, metadata={"help": "Keyword arguments to pass to the __call__ method of the pipeline."}
)
def __post_init__(self):
diff --git a/optimum_benchmark/benchmarks/inference/report.py b/optimum_benchmark/benchmarks/inference/report.py
deleted file mode 100644
index 9cd43cfc..00000000
--- a/optimum_benchmark/benchmarks/inference/report.py
+++ /dev/null
@@ -1,353 +0,0 @@
-from dataclasses import dataclass, field
-from statistics import mean, stdev
-from typing import Any, Dict, List
-from logging import getLogger
-
-from ..report import BenchmarkReport
-
-LOGGER = getLogger("report")
-
-
-@dataclass
-class InferenceReport(BenchmarkReport):
- # Config
- batch_size: int
- # Metrics
- forward: Dict[str, Any] = field(default_factory=dict)
-
- # POPULATING
- def populate_latency(self, forward_latencies_list: List[float]):
- ## Latency
- self.forward["latency"] = {
- "list[s]": forward_latencies_list,
- "mean(s)": compute_mean(forward_latencies_list),
- "stdev(s)": compute_stdev(forward_latencies_list),
- }
- ## Throughput
- forward_throughputs_list = [self.batch_size / latency for latency in forward_latencies_list]
- self.forward["throughput"] = {
- "list[samples/s]": forward_throughputs_list,
- "mean(samples/s)": compute_mean(forward_throughputs_list),
- "stdev(samples/s)": compute_stdev(forward_throughputs_list),
- }
-
- def populate_memory(self, forward_memories_dict: Dict[str, Any]):
- self.forward["memory"] = forward_memories_dict
-
- def populate_energy(self, forward_energies_dict: Dict[str, Any]):
- self.forward["energy"] = forward_energies_dict
-
- # LOGGING
- def log_latency(self):
- for key, value in self.forward["latency"].items():
- if "list" in key:
- continue
- LOGGER.info(f"\t+ forward.latency.{key}: {value:f} (s)")
- for key, value in self.forward["throughput"].items():
- if "list" in key:
- continue
- LOGGER.info(f"\t+ forward.throughput.{key}: {value:f} (samples/s)")
-
- def log_memory(self):
- for key, value in self.forward["memory"].items():
- LOGGER.info(f"\t+ forward.memory.{key}: {value:f} (MB)")
-
- def log_energy(self):
- for key, value in self.forward["energy"].items():
- LOGGER.info(f"\t+ forward.energy.{key}: {value:f} (kWh)")
-
- def log_all(self) -> None:
- if "latency" in self.forward:
- self.log_latency()
- if "memory" in self.forward:
- self.log_memory()
- if "energy" in self.forward:
- self.log_energy()
-
- # add operator to aggregate multiple reports
- def __add__(self, other: "InferenceReport") -> "InferenceReport":
- agg_report = InferenceReport(batch_size=self.batch_size + other.batch_size)
- if "latency" in self.forward and "latency" in other.forward:
- agg_forward_latencies_list = [
- (lat_1 + lat_2) / 2
- for lat_1, lat_2 in zip(self.forward["latency"]["list[s]"], other.forward["latency"]["list[s]"])
- ]
- agg_report.populate_latency(agg_forward_latencies_list)
-
- if "memory" in self.forward and "memory" in other.forward:
- agg_forward_memories_dict = {}
- for key in self.forward["memory"]:
- if "vram" in key:
- # our vram measures are not process-specific
- agg_forward_memories_dict[key] = max(self.forward["memory"][key], other.forward["memory"][key])
- else:
- # ram and pytorch measures are process-specific
- agg_forward_memories_dict[key] = self.forward["memory"][key] + other.forward["memory"][key]
-
- agg_report.populate_memory(agg_forward_memories_dict)
-
- if "energy" in self.forward and "energy" in other.forward:
- agg_forward_energies_dict = {}
- for key in self.forward["energy"]:
- # theoretically, the energies measured by codecarbon are process-specific (it's not clear from the code)
- agg_forward_energies_dict[key] = self.forward["energy"][key] + other.forward["energy"][key]
-
- agg_report.populate_energy(agg_forward_energies_dict)
-
- return agg_report
-
-
-@dataclass
-class ImageDiffusionReport(BenchmarkReport):
- # Config
- batch_size: int
- num_images_per_prompts: int
- # Metrics
- call: Dict[str, Any] = field(default_factory=dict)
-
- # POPULATING
- def populate_latency(self, call_latencies_list: List[float]):
- ## Latency
- self.call["latency"] = {
- "list[s]": call_latencies_list,
- "mean(s)": compute_mean(call_latencies_list),
- "stdev(s)": compute_stdev(call_latencies_list),
- }
- ## Throughput
- call_throughputs_list = [
- self.batch_size * self.num_images_per_prompts / latency for latency in call_latencies_list
- ]
- self.call["throughput"] = {
- "list[images/s]": call_throughputs_list,
- "mean[images/s]": compute_mean(call_throughputs_list),
- "stdev[images/s]": compute_stdev(call_throughputs_list),
- }
-
- def populate_memory(self, call_memories_dict: Dict[str, Any]):
- self.call["memory"] = call_memories_dict
-
- def populate_energy(self, call_energies_dict: Dict[str, Any]):
- self.call["energy"] = call_energies_dict
-
- # LOGGING
- def log_latency(self):
- for key, value in self.call["latency"].items():
- if "list" in key:
- continue
- LOGGER.info(f"\t+ call.latency.{key}: {value:f} (s)")
- for key, value in self.call["throughput"].items():
- if "list" in key:
- continue
- LOGGER.info(f"\t+ call.throughput.{key}: {value:f} (images/s)")
-
- def log_memory(self):
- for key, value in self.call["memory"].items():
- LOGGER.info(f"\t+ call.memory.{key}: {value:f} (MB)")
-
- def log_energy(self):
- for key, value in self.call["energy"].items():
- LOGGER.info(f"\t+ call.energy.{key}: {value:f} (kWh)")
-
- def log_all(self) -> None:
- if "latency" in self.call:
- self.log_latency()
- if "memory" in self.call:
- self.log_memory()
- if "energy" in self.call:
- self.log_energy()
-
- # add operator to aggregate multiple reports
- def __add__(self, other: "ImageDiffusionReport") -> "ImageDiffusionReport":
- assert self.num_images_per_prompts == other.num_images_per_prompts, "num_images_per_prompts must be the same"
-
- agg_report = ImageDiffusionReport(
- batch_size=self.batch_size + other.batch_size,
- num_images_per_prompts=self.num_images_per_prompts,
- )
- if "latency" in self.call and "latency" in other.call:
- agg_call_latencies_list = [
- (lat_1 + lat_2) / 2
- for lat_1, lat_2 in zip(self.call["latency"]["list[s]"], other.call["latency"]["list[s]"])
- ]
- agg_report.populate_latency(agg_call_latencies_list)
-
- if "memory" in self.call and "memory" in other.call:
- agg_call_memories_dict = {}
- for key in self.call["memory"]:
- if "vram" in key:
- # our vram measures are not process-specific
- agg_call_memories_dict[key] = max(self.call["memory"][key], other.call["memory"][key])
- else:
- # ram and pytorch measures are process-specific
- agg_call_memories_dict[key] = self.call["memory"][key] + other.call["memory"][key]
-
- agg_report.populate_memory(agg_call_memories_dict)
-
- if "energy" in self.call and "energy" in other.call:
- agg_call_energies_dict = {}
- for key in self.call["energy"]:
- # theoretically, the energies measured by codecarbon are process-specific (it's not clear from the code)
- agg_call_energies_dict[key] = self.call["energy"][key] + other.call["energy"][key]
-
- agg_report.populate_energy(agg_call_energies_dict)
-
- return agg_report
-
-
-@dataclass
-class TextGenerationReport(BenchmarkReport):
- # Config
- batch_size: int
- sequence_length: int
- num_new_tokens: int
- num_return_sequences: int
- # Prefill Metrics
- prefill: Dict[str, Any] = field(default_factory=dict)
- # Decode Metrics
- decode: Dict[str, Any] = field(default_factory=dict)
-
- def populate_latency(self, forward_latencies_list: List[float], generate_latencies_list: List[float]):
- ## Latency
- self.prefill["latency"] = {
- "list[s]": forward_latencies_list,
- "mean(s)": compute_mean(forward_latencies_list),
- "stdev(s)": compute_stdev(forward_latencies_list),
- }
- ## Throughput
- prefill_throughputs_list = [
- self.batch_size * self.sequence_length / latency for latency in forward_latencies_list
- ]
- self.prefill["throughput"] = {
- "list[tokens/s]": prefill_throughputs_list,
- "mean[tokens/s]": compute_mean(prefill_throughputs_list),
- "stdev[tokens/s]": compute_stdev(prefill_throughputs_list),
- }
- ## Latency
- decode_latencies_list = [
- generate_latency - self.prefill["latency"]["mean(s)"] for generate_latency in generate_latencies_list
- ]
- self.decode["latency"] = {
- "list[s]": decode_latencies_list,
- "mean(s)": compute_mean(decode_latencies_list),
- "stdev(s)": compute_stdev(decode_latencies_list),
- }
- ## Throughput
- decode_throughputs_list = [
- self.batch_size * self.num_new_tokens * self.num_return_sequences / latency
- for latency in decode_latencies_list
- ]
- self.decode["throughput"] = {
- "list[tokens/s]": decode_throughputs_list,
- "mean[tokens/s]": compute_mean(decode_throughputs_list),
- "stdev[tokens/s]": compute_stdev(decode_throughputs_list),
- }
-
- def populate_memory(self, forward_memories_dict: Dict[str, Any], generate_memories_dict: Dict[str, Any]):
- self.prefill["memory"] = forward_memories_dict
- self.decode["memory"] = generate_memories_dict
-
- def populate_energy(self, forward_energies_dict: Dict[str, Any], generate_energies_dict: Dict[str, Any]):
- self.prefill["energy"] = forward_energies_dict
- self.decode["energy"] = generate_energies_dict
-
- # LOGGING
- def log_latency(self):
- for key, value in self.prefill["latency"].items():
- if "list" in key:
- continue
- LOGGER.info(f"\t+ prefill.latency.{key}: {value:f} (s)")
- for key, value in self.prefill["throughput"].items():
- if "list" in key:
- continue
- LOGGER.info(f"\t+ prefill.throughput.{key}: {value:f} (tokens/s)")
- for key, value in self.decode["latency"].items():
- if "list" in key:
- continue
- LOGGER.info(f"\t+ decode.latency.{key}: {value:f} (s)")
- for key, value in self.decode["throughput"].items():
- if "list" in key:
- continue
- LOGGER.info(f"\t+ decode.throughput.{key}: {value:f} (tokens/s)")
-
- def log_memory(self):
- for key, value in self.prefill["memory"].items():
- LOGGER.info(f"\t+ prefill.memory.{key}: {value:f} (MB)")
- for key, value in self.decode["memory"].items():
- LOGGER.info(f"\t+ decode.memory.{key}: {value:f} (MB)")
-
- def log_energy(self):
- for key, value in self.prefill["energy"].items():
- LOGGER.info(f"\t+ prefill.energy.{key}: {value:f} (kWh)")
- for key, value in self.decode["energy"].items():
- LOGGER.info(f"\t+ decode.energy.{key}: {value:f} (kWh)")
-
- def log_all(self) -> None:
- if "latency" in self.prefill:
- self.log_latency()
- if "memory" in self.prefill:
- self.log_memory()
- if "energy" in self.prefill:
- self.log_energy()
-
- # add operator to aggregate multiple reports
- def __add__(self, other: "TextGenerationReport") -> "TextGenerationReport":
- agg_report = TextGenerationReport(
- batch_size=self.batch_size + other.batch_size,
- sequence_length=self.sequence_length,
- num_new_tokens=self.num_new_tokens,
- num_return_sequences=self.num_return_sequences,
- )
- if "latency" in self.prefill and "latency" in other.prefill:
- agg_forward_latencies_list = [
- (lat_1 + lat_2) / 2
- for lat_1, lat_2 in zip(self.prefill["latency"]["list[s]"], other.prefill["latency"]["list[s]"])
- ]
- agg_generate_latencies_list = [
- (lat_1 + lat_2) / 2
- for lat_1, lat_2 in zip(self.decode["latency"]["list[s]"], other.decode["latency"]["list[s]"])
- ]
- agg_report.populate_latency(agg_forward_latencies_list, agg_generate_latencies_list)
-
- if "memory" in self.prefill and "memory" in other.prefill:
- agg_forward_memories_dict = {}
- for key in self.prefill["memory"]:
- if "vram" in key:
- # our vram measures are not process-specific
- agg_forward_memories_dict[key] = max(self.prefill["memory"][key], other.prefill["memory"][key])
- else:
- # ram and pytorch measures are process-specific
- agg_forward_memories_dict[key] = self.prefill["memory"][key] + other.prefill["memory"][key]
-
- agg_generate_memories_dict = {}
- for key in self.decode["memory"]:
- if "vram" in key:
- # our vram measures are not process-specific
- agg_generate_memories_dict[key] = max(self.decode["memory"][key], other.decode["memory"][key])
- else:
- # ram and pytorch measures are process-specific
- agg_generate_memories_dict[key] = self.decode["memory"][key] + other.decode["memory"][key]
-
- agg_report.populate_memory(agg_forward_memories_dict, agg_generate_memories_dict)
-
- if "energy" in self.prefill and "energy" in other.prefill:
- agg_forward_energies_dict = {}
- for key in self.prefill["energy"]:
- # theoretically, the energies measured by codecarbon are process-specific (it's not clear from the code)
- agg_forward_energies_dict[key] = self.prefill["energy"][key] + other.prefill["energy"][key]
-
- agg_generate_energies_dict = {}
- for key in self.decode["energy"]:
- # theoretically, the energies measured by codecarbon are process-specific (it's not clear from the code)
- agg_generate_energies_dict[key] = self.decode["energy"][key] + other.decode["energy"][key]
-
- agg_report.populate_energy(agg_forward_energies_dict, agg_generate_energies_dict)
-
- return agg_report
-
-
-def compute_mean(values: List[float]) -> float:
- return mean(values) if len(values) > 0 else 0.0
-
-
-def compute_stdev(values: List[float]) -> float:
- return stdev(values) if len(values) > 1 else 0.0
diff --git a/optimum_benchmark/benchmarks/report.py b/optimum_benchmark/benchmarks/report.py
index 69491d65..02dbc541 100644
--- a/optimum_benchmark/benchmarks/report.py
+++ b/optimum_benchmark/benchmarks/report.py
@@ -1,11 +1,61 @@
-from dataclasses import dataclass, asdict
-from typing import Union, Optional
-from json import dump
import os
+from dataclasses import asdict, dataclass
+from json import dump
+from logging import getLogger
+from typing import Any, Dict, List, Optional, Union
-from transformers.configuration_utils import PushToHubMixin
-from flatten_dict import flatten
import pandas as pd
+from flatten_dict import flatten
+from transformers.configuration_utils import PushToHubMixin
+
+from ..trackers.energy import Efficiency, Energy
+from ..trackers.latency import Latency, Throughput
+from ..trackers.memory import Memory
+
+LOGGER = getLogger("report")
+
+REPORT_FILE_NAME = "benchmark_report.json"
+
+
+@dataclass
+class BenchmarkMeasurements:
+ memory: Optional[Memory] = None
+ latency: Optional[Latency] = None
+ throughput: Optional[Throughput] = None
+ energy: Optional[Energy] = None
+ efficiency: Optional[Efficiency] = None
+
+ @staticmethod
+ def aggregate(benchmark_measurements: List["BenchmarkMeasurements"]) -> "BenchmarkMeasurements":
+ memory = (
+ Memory.aggregate([m.memory for m in benchmark_measurements])
+ if benchmark_measurements[0].memory is not None
+ else None
+ )
+ latency = (
+ Latency.aggregate([m.latency for m in benchmark_measurements])
+ if benchmark_measurements[0].latency is not None
+ else None
+ )
+ throughput = (
+ Throughput.aggregate([m.throughput for m in benchmark_measurements if m.throughput is not None])
+ if benchmark_measurements[0].throughput is not None
+ else None
+ )
+ energy = (
+ Energy.aggregate([m.energy for m in benchmark_measurements if m.energy is not None])
+ if benchmark_measurements[0].energy is not None
+ else None
+ )
+ efficiency = (
+ Efficiency.aggregate([m.efficiency for m in benchmark_measurements if m.efficiency is not None])
+ if benchmark_measurements[0].efficiency is not None
+ else None
+ )
+
+ return BenchmarkMeasurements(
+ memory=memory, latency=latency, throughput=throughput, energy=energy, efficiency=efficiency
+ )
@dataclass
@@ -22,7 +72,7 @@ def save_pretrained(
if use_auth_token is not None:
kwargs["token"] = use_auth_token
- config_file_name = config_file_name if config_file_name is not None else "benchmark_report.json"
+ config_file_name = config_file_name if config_file_name is not None else REPORT_FILE_NAME
if os.path.isfile(save_directory):
raise AssertionError(f"Provided path ({save_directory}) should be a directory, not a file")
@@ -36,21 +86,17 @@ def save_pretrained(
files_timestamps = self._get_files_timestamps(save_directory)
output_config_file = os.path.join(save_directory, config_file_name)
- self.to_json(output_config_file)
+ self.to_json(output_config_file, flat=False)
if push_to_hub:
self._upload_modified_files(
- save_directory,
- repo_id,
- files_timestamps,
- commit_message=commit_message,
- token=kwargs.get("token"),
+ save_directory, repo_id, files_timestamps, commit_message=commit_message, token=kwargs.get("token")
)
- def to_dict(self) -> dict:
+ def to_dict(self) -> Dict[str, Any]:
return asdict(self)
- def to_flat_dict(self) -> dict:
+ def to_flat_dict(self) -> Dict[str, Any]:
report_dict = self.to_dict()
return flatten(report_dict, reducer="dot")
@@ -64,10 +110,60 @@ def to_json(self, path: str, flat: bool = False) -> None:
def to_dataframe(self) -> pd.DataFrame:
flat_report_dict = self.to_flat_dict()
- return pd.DataFrame(flat_report_dict, index=[0])
+ return pd.DataFrame.from_dict(flat_report_dict, orient="index")
def to_csv(self, path: str) -> None:
self.to_dataframe().to_csv(path, index=False)
- def log_all(self) -> None:
- raise NotImplementedError("`log_all` method must be implemented in the child class")
+ def log_memory(self):
+ for target in self.to_dict().keys():
+ benchmark_measurements: BenchmarkMeasurements = getattr(self, target)
+ if benchmark_measurements.memory is not None:
+ benchmark_measurements.memory.log(prefix=target)
+
+ def log_latency(self):
+ for target in self.to_dict().keys():
+ benchmark_measurements: BenchmarkMeasurements = getattr(self, target)
+ if benchmark_measurements.latency is not None:
+ benchmark_measurements.latency.log(prefix=target)
+
+ def log_throughput(self):
+ for target in self.to_dict().keys():
+ benchmark_measurements: BenchmarkMeasurements = getattr(self, target)
+ if benchmark_measurements.throughput is not None:
+ benchmark_measurements.throughput.log(prefix=target)
+
+ def log_energy(self):
+ for target in self.to_dict().keys():
+ benchmark_measurements: BenchmarkMeasurements = getattr(self, target)
+ if benchmark_measurements.energy is not None:
+ benchmark_measurements.energy.log(prefix=target)
+
+ def log_efficiency(self):
+ for target in self.to_dict().keys():
+ benchmark_measurements: BenchmarkMeasurements = getattr(self, target)
+ if benchmark_measurements.efficiency is not None:
+ benchmark_measurements.efficiency.log(prefix=target)
+
+ def log(self):
+ for target in self.to_dict().keys():
+ benchmark_measurements: BenchmarkMeasurements = getattr(self, target)
+ if benchmark_measurements.memory is not None:
+ benchmark_measurements.memory.log(prefix=target)
+ if benchmark_measurements.latency is not None:
+ benchmark_measurements.latency.log(prefix=target)
+ if benchmark_measurements.throughput is not None:
+ benchmark_measurements.throughput.log(prefix=target)
+ if benchmark_measurements.energy is not None:
+ benchmark_measurements.energy.log(prefix=target)
+ if benchmark_measurements.efficiency is not None:
+ benchmark_measurements.efficiency.log(prefix=target)
+
+ @classmethod
+ def aggregate(cls, reports: List["BenchmarkReport"]) -> "BenchmarkReport":
+ aggregated_measurements = {}
+ for target in reports[0].to_dict().keys():
+ benchmark_measurements = [getattr(report, target) for report in reports]
+ aggregated_measurements[target] = BenchmarkMeasurements.aggregate(benchmark_measurements)
+
+ return cls(**aggregated_measurements)
diff --git a/optimum_benchmark/benchmarks/training/benchmark.py b/optimum_benchmark/benchmarks/training/benchmark.py
index 90c231d0..950cb0f7 100644
--- a/optimum_benchmark/benchmarks/training/benchmark.py
+++ b/optimum_benchmark/benchmarks/training/benchmark.py
@@ -1,19 +1,30 @@
-from logging import getLogger
from contextlib import ExitStack
+from dataclasses import dataclass
+from logging import getLogger
+
+from transformers import default_data_collator
-from ..base import Benchmark
-from .config import TrainingConfig
-from .report import TrainingReport
-from ...trackers.memory import MemoryTracker
-from ...trackers.energy import EnergyTracker
-from .callback import LatencyTrainerCallback
from ...backends.base import Backend, BackendConfigT
from ...generators.dataset_generator import DatasetGenerator
-
-from transformers import default_data_collator
+from ...trackers.energy import Efficiency, EnergyTracker
+from ...trackers.latency import LatencyTrainerCallback, Throughput
+from ...trackers.memory import MemoryTracker
+from ..base import Benchmark
+from ..report import BenchmarkMeasurements, BenchmarkReport
+from .config import TrainingConfig
LOGGER = getLogger("training")
+TRAIN_THROUGHPUT_UNIT = "samples/s"
+TRAIN_EFFICIENCY_UNIT = "samples/kWh"
+
+
+@dataclass
+class TrainingReport(BenchmarkReport):
+ overall: BenchmarkMeasurements = BenchmarkMeasurements()
+ warmup: BenchmarkMeasurements = BenchmarkMeasurements()
+ train: BenchmarkMeasurements = BenchmarkMeasurements()
+
class TrainingBenchmark(Benchmark[TrainingConfig]):
NAME = "training"
@@ -24,21 +35,14 @@ def __init__(self, config: TrainingConfig) -> None:
def run(self, backend: Backend[BackendConfigT]) -> None:
LOGGER.info("\t+ Creating dataset generator")
dataset_generator = DatasetGenerator(
- task=backend.config.task,
- model_shapes=backend.model_shapes,
- dataset_shapes=self.config.dataset_shapes,
+ task=backend.config.task, model_shapes=backend.model_shapes, dataset_shapes=self.config.dataset_shapes
)
LOGGER.info("\t+ Generating training dataset")
training_dataset = dataset_generator()
LOGGER.info("\t+ Initializing training report")
- self.report = TrainingReport(
- max_steps=self.config.max_steps,
- warmup_steps=self.config.warmup_steps,
- per_process_batch_size=self.config.training_arguments["per_device_train_batch_size"],
- gradient_accumulation_steps=self.config.training_arguments["gradient_accumulation_steps"],
- )
+ self.report = TrainingReport()
training_callbackes = []
if self.config.latency:
@@ -70,17 +74,51 @@ def run(self, backend: Backend[BackendConfigT]) -> None:
training_arguments=self.config.training_arguments,
)
- if self.config.latency:
- self.report.populate_latency(overall_latencies_list=latency_callback.get_latencies_list())
- self.report.log_latency()
-
if self.config.memory:
- self.report.populate_memory(overall_memories_dict=memory_tracker.get_memories_dict())
- self.report.log_memory()
+ self.report.overall.memory = memory_tracker.get_max_memory()
+ self.report.warmup.memory = memory_tracker.get_max_memory()
+ self.report.train.memory = memory_tracker.get_max_memory()
+
+ if self.config.latency:
+ self.report.overall.latency = latency_callback.get_latency()
+ self.report.overall.throughput = Throughput.from_latency(
+ self.report.overall.latency, volume=self.overall_volume, unit=TRAIN_THROUGHPUT_UNIT
+ )
+ self.report.warmup.latency = self.report.overall.latency[: self.config.warmup_steps]
+ self.report.warmup.throughput = Throughput.from_latency(
+ self.report.warmup.latency, volume=self.warmup_volume, unit=TRAIN_THROUGHPUT_UNIT
+ )
+ self.report.train.latency = self.report.overall.latency[self.config.warmup_steps :]
+ self.report.train.throughput = Throughput.from_latency(
+ self.report.train.latency, volume=self.train_volume, unit=TRAIN_THROUGHPUT_UNIT
+ )
if self.config.energy:
- self.report.populate_energy(overall_energies_dict=energy_tracker.get_energies_dict())
- self.report.log_energy()
+ # can only get overall energy consumption
+ self.report.overall.energy = energy_tracker.get_energy()
+ self.report.overall.efficiency = Efficiency.from_energy(
+ self.report.overall.energy, volume=self.overall_volume, unit=TRAIN_EFFICIENCY_UNIT
+ )
+
+ @property
+ def overall_volume(self) -> int:
+ return (
+ self.config.max_steps
+ * self.config.training_arguments["per_device_train_batch_size"]
+ * self.config.training_arguments["gradient_accumulation_steps"]
+ )
+
+ @property
+ def warmup_volume(self) -> int:
+ return (
+ self.config.warmup_steps
+ * self.config.training_arguments["per_device_train_batch_size"]
+ * self.config.training_arguments["gradient_accumulation_steps"]
+ )
+
+ @property
+ def train_volume(self) -> int:
+ return self.overall_volume - self.warmup_volume
def get_report(self) -> TrainingReport:
return self.report
diff --git a/optimum_benchmark/benchmarks/training/callback.py b/optimum_benchmark/benchmarks/training/callback.py
deleted file mode 100644
index 88026d79..00000000
--- a/optimum_benchmark/benchmarks/training/callback.py
+++ /dev/null
@@ -1,43 +0,0 @@
-import time
-from typing import List
-
-import torch
-from transformers import TrainerCallback
-
-
-class LatencyTrainerCallback(TrainerCallback):
- def __init__(self, device: str, backend: str) -> None:
- self.device = device
- self.backend = backend
- self.all_latencies_list = []
-
- def on_step_begin(self, *args, **kwargs):
- # one record per step
- if self.device == "cuda" and self.backend == "pytorch":
- self.all_latencies_list.append(torch.cuda.Event(enable_timing=True))
- self.all_latencies_list[-1].record()
- else:
- self.all_latencies_list.append(time.perf_counter_ns())
-
- def on_train_end(self, *args, **kwargs):
- # one last record to measure the time of the last step
- if self.device == "cuda" and self.backend == "pytorch":
- self.all_latencies_list.append(torch.cuda.Event(enable_timing=True))
- self.all_latencies_list[-1].record()
- else:
- self.all_latencies_list.append(time.perf_counter_ns())
-
- def get_latencies_list(self) -> List[float]:
- if self.device == "cuda" and self.backend == "pytorch":
- torch.cuda.synchronize() # synchronize the device to make sure all events have been recorded
- latencies_list = [
- self.all_latencies_list[i - 1].elapsed_time(self.all_latencies_list[i]) * 1e-3
- for i in range(1, len(self.all_latencies_list))
- ]
- else:
- latencies_list = [
- (self.all_latencies_list[i] - self.all_latencies_list[i - 1]) * 1e-9
- for i in range(1, len(self.all_latencies_list))
- ]
-
- return latencies_list
diff --git a/optimum_benchmark/benchmarks/training/config.py b/optimum_benchmark/benchmarks/training/config.py
index e5d19581..6ea9d0b4 100644
--- a/optimum_benchmark/benchmarks/training/config.py
+++ b/optimum_benchmark/benchmarks/training/config.py
@@ -25,11 +25,7 @@
"ddp_find_unused_parameters": False,
}
-DATASET_SHAPES = {
- "dataset_size": 500,
- "sequence_length": 16,
- "num_choices": 1,
-}
+DATASET_SHAPES = {"dataset_size": 500, "sequence_length": 16, "num_choices": 1}
@dataclass
@@ -63,7 +59,8 @@ def __post_init__(self):
if self.max_steps != self.training_arguments["max_steps"]:
LOGGER.warning(
f"`benchmark.max_steps` ({self.max_steps}) and `benchmark.training_arguments.max_steps` "
- f"({self.training_arguments['max_steps']}) are different. Using `benchmark.training_arguments.max_steps`."
+ f"({self.training_arguments['max_steps']}) are different. "
+ "Using `benchmark.training_arguments.max_steps`."
)
self.max_steps = self.training_arguments["max_steps"]
diff --git a/optimum_benchmark/benchmarks/training/report.py b/optimum_benchmark/benchmarks/training/report.py
deleted file mode 100644
index 9eeba211..00000000
--- a/optimum_benchmark/benchmarks/training/report.py
+++ /dev/null
@@ -1,169 +0,0 @@
-from dataclasses import dataclass, field
-from statistics import mean, stdev
-from typing import Any, Dict, List
-from logging import getLogger
-
-from ..report import BenchmarkReport
-
-LOGGER = getLogger("report")
-
-
-@dataclass
-class TrainingReport(BenchmarkReport):
- max_steps: int
- warmup_steps: int
- per_process_batch_size: int
- gradient_accumulation_steps: int
-
- overall: Dict[str, Any] = field(default_factory=dict)
- training: Dict[str, Any] = field(default_factory=dict)
- warmup: Dict[str, Any] = field(default_factory=dict)
-
- world_size: int = 1
-
- # POPULATING
- def populate_latency(self, overall_latencies_list: List[float]) -> None:
- assert (
- len(overall_latencies_list) == self.max_steps
- ), f"Expected {self.max_steps} latencies, but got {len(overall_latencies_list)} latencies"
- # Overall
- ## Latency
- self.overall["latency"] = {
- "list[s/step]": overall_latencies_list,
- "mean(s/step)": compute_mean(overall_latencies_list),
- "stdev(s/step)": compute_stdev(overall_latencies_list),
- }
- ## Throughput
- overall_throughputs_list = [
- self.world_size * self.per_process_batch_size * self.gradient_accumulation_steps / latency
- for latency in overall_latencies_list
- ]
- self.overall["throughput"] = {
- "list[samples/s]": overall_throughputs_list,
- "mean(samples/s)": compute_mean(overall_throughputs_list),
- "stdev(samples/s)": compute_stdev(overall_throughputs_list),
- }
- # Training
- ## Latency
- training_latencies_list = overall_latencies_list[self.warmup_steps :]
- self.training["latency"] = {
- "list[s/step]": training_latencies_list,
- "mean(s/step)": compute_mean(training_latencies_list),
- "stdev(s/step)": compute_stdev(training_latencies_list),
- }
- ## Throughput
- training_throughputs_list = overall_throughputs_list[self.warmup_steps :]
- self.training["throughput"] = {
- "list[samples/s]": training_throughputs_list,
- "mean(samples/s)": compute_mean(training_throughputs_list),
- "stdev(samples/s)": compute_stdev(training_throughputs_list),
- }
- # Warmup
- ## Latency
- warmup_latencies_list = overall_latencies_list[: self.warmup_steps]
- self.warmup["latency"] = {
- "list[s/step]": warmup_latencies_list,
- "mean(s/step)": compute_mean(warmup_latencies_list),
- "stdev(s/step)": compute_stdev(warmup_latencies_list),
- }
- ## Throughput
- warmup_throughputs_list = overall_throughputs_list[: self.warmup_steps]
- self.warmup["throughput"] = {
- "list[samples/s]": warmup_throughputs_list,
- "mean(samples/s)": compute_mean(warmup_throughputs_list),
- "stdev(samples/s)": compute_stdev(warmup_throughputs_list),
- }
-
- def populate_memory(self, overall_memories_dict: Dict[str, float]) -> None:
- self.warmup["memory"] = overall_memories_dict
- self.overall["memory"] = overall_memories_dict
- self.training["memory"] = overall_memories_dict
-
- def populate_energy(self, overall_energies_dict: Dict[str, float]) -> None:
- self.overall["energy"] = overall_energies_dict
- # can't get training only or warmup only energies
- # self.warmup["energy"] = overall_energies_dict
- # self.training["energy"] = overall_energies_dict
- # TODO: use a callback for energy instead of a tracker
-
- # LOGGING
- def log_latency(self):
- for key, value in self.training["latency"].items():
- if "list" in key:
- continue
- LOGGER.info(f"\t+ training.latency.{key}: {value:f} (s)")
- for key, value in self.training["throughput"].items():
- if "list" in key:
- continue
- LOGGER.info(f"\t+ training.throughput.{key}: {value:f} (samples/s)")
-
- def log_memory(self):
- for key, value in self.training["memory"].items():
- LOGGER.info(f"\t+ training.memory.{key}: {value:f} (MB)")
-
- def log_energy(self):
- for key, value in self.overall["energy"].items():
- LOGGER.info(f"\t+ overall.energy.{key}: {value:f} (kWh)")
-
- def log_all(self):
- if "latency" in self.training:
- self.log_latency()
- if "memory" in self.training:
- self.log_memory()
- if "energy" in self.training:
- self.log_energy()
-
- # LOGIC
- def __add__(self, other: "TrainingReport") -> "TrainingReport":
- assert self.max_steps == other.max_steps, "Both reports must have the same max_steps"
- assert self.warmup_steps == other.warmup_steps, "Both reports must have the same warmup_steps"
- assert (
- self.gradient_accumulation_steps == other.gradient_accumulation_steps
- ), "Both reports must have the same gradient_accumulation_steps"
-
- agg_report = TrainingReport(
- max_steps=self.max_steps,
- warmup_steps=self.warmup_steps,
- world_size=self.world_size + other.world_size,
- per_process_batch_size=self.per_process_batch_size,
- gradient_accumulation_steps=self.gradient_accumulation_steps,
- )
-
- if "latency" in self.overall:
- agg_overall_latencies_list = [
- max(lat_1, lat_2)
- for lat_1, lat_2 in zip(
- self.overall["latency"]["list[s/step]"], other.overall["latency"]["list[s/step]"]
- )
- ]
- agg_report.populate_latency(agg_overall_latencies_list)
-
- if "memory" in self.overall:
- agg_overall_memories_dict = {}
- for key in self.overall["memory"]:
- if "vram" in key:
- # our vram measures are not process-specific
- agg_overall_memories_dict[key] = max(self.overall["memory"][key], other.overall["memory"][key])
- else:
- # ram and pytorch measures are process-specific (can be accumulated)
- agg_overall_memories_dict[key] = self.overall["memory"][key] + other.overall["memory"][key]
-
- agg_report.populate_memory(agg_overall_memories_dict)
-
- if "energy" in self.overall:
- agg_overall_energies_dict = {}
- for key in self.overall["energy"]:
- # theoretically, the energies measured by codecarbon are process-specific (it's not clear from the code)
- agg_overall_energies_dict[key] = self.overall["energy"][key] + other.overall["energy"][key]
-
- agg_report.populate_energy(agg_overall_energies_dict)
-
- return agg_report
-
-
-def compute_mean(values: List[float]) -> float:
- return mean(values) if len(values) > 0 else 0.0
-
-
-def compute_stdev(values: List[float]) -> float:
- return stdev(values) if len(values) > 1 else 0.0
diff --git a/optimum_benchmark/benchmarks/utils.py b/optimum_benchmark/benchmarks/utils.py
deleted file mode 100644
index 8b137891..00000000
--- a/optimum_benchmark/benchmarks/utils.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/optimum_benchmark/cli.py b/optimum_benchmark/cli.py
index 4961c189..f91a3b2c 100644
--- a/optimum_benchmark/cli.py
+++ b/optimum_benchmark/cli.py
@@ -1,28 +1,25 @@
-import os
import glob
+import os
from logging import getLogger
import hydra
-from omegaconf import DictConfig, OmegaConf
from hydra.core.config_store import ConfigStore
+from omegaconf import DictConfig, OmegaConf
-from .launchers.inline.config import InlineConfig
-from .launchers.process.config import ProcessConfig
-from .launchers.torchrun.config import TorchrunConfig
-
+from .backends.neural_compressor.config import INCConfig
+from .backends.onnxruntime.config import ORTConfig
from .backends.openvino.config import OVConfig
from .backends.pytorch.config import PyTorchConfig
-from .backends.onnxruntime.config import ORTConfig
-from .backends.torch_ort.config import TorchORTConfig
from .backends.tensorrt_llm.config import TRTLLMConfig
-from .backends.neural_compressor.config import INCConfig
from .backends.text_generation_inference.config import TGIConfig
-
+from .backends.torch_ort.config import TorchORTConfig
+from .benchmarks.inference.config import InferenceConfig
from .benchmarks.report import BenchmarkReport
-from .experiment import launch, ExperimentConfig
from .benchmarks.training.config import TrainingConfig
-from .benchmarks.inference.config import InferenceConfig
-
+from .experiment import ExperimentConfig, launch
+from .launchers.inline.config import InlineConfig
+from .launchers.process.config import ProcessConfig
+from .launchers.torchrun.config import TorchrunConfig
LOGGER = getLogger("cli")
@@ -49,33 +46,17 @@
# optimum-benchmark
@hydra.main(version_base=None)
def benchmark_cli(experiment_config: DictConfig) -> None:
- os.environ["BENCHMARK_CLI"] = "1"
+ os.environ["BENCHMARK_INTERFACE"] = "CLI"
- if glob.glob("*.csv") and os.environ.get("OVERRIDE_BENCHMARKS", "0") != "1":
+ if glob.glob("benchmark_report.json") and os.environ.get("OVERRIDE_BENCHMARKS", "0") != "1":
LOGGER.warning(
- "Skipping benchmark because results already exist. "
- "Set OVERRIDE_BENCHMARKS=1 to override benchmark results."
+ "Benchmark report already exists. If you want to override it, set the environment variable OVERRIDE_BENCHMARKS=1"
)
return
- # fix backend until deprecated model and device are removed
- if experiment_config.task is not None:
- LOGGER.warning("`task` is deprecated in experiment. Use `backend.task` instead.")
- experiment_config.backend.task = experiment_config.task
- if experiment_config.model is not None:
- LOGGER.warning("`model` is deprecated in experiment. Use `backend.model` instead.")
- experiment_config.backend.model = experiment_config.model
- if experiment_config.device is not None:
- LOGGER.warning("`device` is deprecated in experiment. Use `backend.device` instead.")
- experiment_config.backend.device = experiment_config.device
- if experiment_config.library is not None:
- LOGGER.warning("`library` is deprecated in experiment. Use `backend.library` instead.")
- experiment_config.backend.library = experiment_config.library
-
# Instantiate the experiment configuration and trigger its __post_init__
experiment_config: ExperimentConfig = OmegaConf.to_object(experiment_config)
- OmegaConf.save(experiment_config, "experiment_config.yaml", resolve=True)
+ experiment_config.to_json("experiment_config.json")
benchmark_report: BenchmarkReport = launch(experiment_config=experiment_config)
-
benchmark_report.to_json("benchmark_report.json")
diff --git a/optimum_benchmark/env_utils.py b/optimum_benchmark/env_utils.py
deleted file mode 100644
index ed4b710b..00000000
--- a/optimum_benchmark/env_utils.py
+++ /dev/null
@@ -1,175 +0,0 @@
-import os
-import re
-import platform
-import subprocess
-import importlib.util
-from typing import Optional, List
-
-from .import_utils import is_py3nvml_available, is_pyrsmi_available
-
-import psutil
-
-
-def is_nvidia_system():
- try:
- subprocess.check_output("nvidia-smi")
- return True
- except Exception:
- return False
-
-
-def is_rocm_system():
- try:
- subprocess.check_output("rocm-smi")
- return True
- except Exception:
- return False
-
-
-def bytes_to_mega_bytes(bytes: int) -> int:
- # MB, not MiB
- # Reference: https://en.wikipedia.org/wiki/Byte#Multiple-byte_units
- return int(bytes * 1e-6)
-
-
-def get_cpu() -> Optional[str]:
- if platform.system() == "Windows":
- return platform.processor()
-
- elif platform.system() == "Darwin":
- command = "sysctl -n machdep.cpu.brand_string"
- return str(subprocess.check_output(command, shell=True).decode().strip())
-
- elif platform.system() == "Linux":
- command = "cat /proc/cpuinfo"
- all_info = subprocess.check_output(command, shell=True).decode().strip()
- for line in all_info.split("\n"):
- if "model name" in line:
- return re.sub(".*model name.*:", "", line, 1)
- return "Could not find device name"
-
- else:
- raise ValueError(f"Unknown system '{platform.system()}'")
-
-
-def get_cpu_ram_mb():
- return bytes_to_mega_bytes(psutil.virtual_memory().total)
-
-
-def get_gpus():
- if is_nvidia_system():
- if not is_py3nvml_available():
- raise ValueError(
- "The library py3nvml is required to collect information on NVIDIA GPUs, but is not installed. "
- "Please install it through `pip install py3nvml`."
- )
- import py3nvml.py3nvml as nvml
-
- gpus = []
- nvml.nvmlInit()
- device_count = nvml.nvmlDeviceGetCount()
- for i in range(device_count):
- handle = nvml.nvmlDeviceGetHandleByIndex(i)
- gpus.append(nvml.nvmlDeviceGetName(handle))
- nvml.nvmlShutdown()
- elif is_rocm_system():
- if not is_pyrsmi_available():
- raise ValueError(
- "The library pyrsmi is required to collect information on ROCm-powered GPUs, but is not installed. "
- "Please install it following the instructions https://github.com/RadeonOpenCompute/pyrsmi."
- )
- from pyrsmi import rocml
-
- rocml.smi_initialize()
-
- device_count = rocml.smi_get_device_count()
-
- gpus = [rocml.smi_get_device_name(index) for index in range(device_count)]
- rocml.smi_shutdown()
- else:
- gpus = []
-
- return gpus
-
-
-def get_gpu_vram_mb() -> List[int]:
- if is_nvidia_system():
- if not is_py3nvml_available():
- raise ValueError(
- "The library py3nvml is required to collect information on NVIDIA GPUs, but is not installed. "
- "Please install it through `pip install py3nvml`."
- )
- import py3nvml.py3nvml as nvml
-
- nvml.nvmlInit()
- device_count = nvml.nvmlDeviceGetCount()
- vrams = [nvml.nvmlDeviceGetMemoryInfo(nvml.nvmlDeviceGetHandleByIndex(i)).total for i in range(device_count)]
- nvml.nvmlShutdown()
- elif is_rocm_system():
- if not is_pyrsmi_available():
- raise ValueError(
- "The library pyrsmi is required to collect information on ROCm-powered GPUs, but is not installed. "
- "Please install it following the instructions https://github.com/RadeonOpenCompute/pyrsmi."
- )
-
- from pyrsmi import rocml
-
- rocml.smi_initialize()
- device_count = rocml.smi_get_device_count()
- vrams = [rocml.smi_get_device_memory_total(index) for index in range(device_count)]
- rocml.smi_shutdown()
- else:
- vrams = []
-
- return sum(vrams)
-
-
-def get_cuda_device_ids() -> str:
- if os.environ.get("CUDA_VISIBLE_DEVICES", None) is not None:
- device_ids = os.environ["CUDA_VISIBLE_DEVICES"]
- else:
- if is_nvidia_system():
- if not is_py3nvml_available():
- raise ValueError(
- "The library py3nvml is required to collect information on NVIDIA GPUs, but is not installed. "
- "Please install it through `pip install py3nvml`."
- )
- import py3nvml.py3nvml as nvml
-
- nvml.nvmlInit()
- device_ids = list(range(nvml.nvmlDeviceGetCount()))
- nvml.nvmlShutdown()
- elif is_rocm_system():
- if not is_pyrsmi_available():
- raise ValueError(
- "The library pyrsmi is required to collect information on ROCm-powered GPUs, but is not installed. "
- "Please install it following the instructions https://github.com/RadeonOpenCompute/pyrsmi."
- )
-
- from pyrsmi import rocml
-
- rocml.smi_initialize()
- device_ids = list(range(rocml.smi_get_device_count()))
- rocml.smi_shutdown()
- else:
- raise ValueError("No NVIDIA or ROCm GPUs found.")
-
- return ",".join(str(i) for i in device_ids)
-
-
-def get_git_revision_hash(package_name: str) -> Optional[str]:
- """
- Returns the git commit SHA of a package installed from a git repository.
- """
-
- try:
- path = importlib.util.find_spec(package_name).origin
- except Exception:
- return None
-
- try:
- git_hash = subprocess.check_output(["git", "rev-parse", "HEAD"], cwd=path).decode().strip()
- except Exception:
- return None
-
- return git_hash
diff --git a/optimum_benchmark/experiment.py b/optimum_benchmark/experiment.py
index c9b6d733..c9a556cc 100644
--- a/optimum_benchmark/experiment.py
+++ b/optimum_benchmark/experiment.py
@@ -1,48 +1,38 @@
import os
-import platform
+from dataclasses import asdict, dataclass, field
from logging import getLogger
from tempfile import TemporaryDirectory
-from dataclasses import dataclass, field
-from typing import Any, Dict, Type, Optional, TYPE_CHECKING
+from typing import TYPE_CHECKING, Any, Dict, Optional, Type, Union
-from hydra.utils import get_class
-
-from .benchmarks.report import BenchmarkReport
+from .backends.config import BackendConfig
from .benchmarks.config import BenchmarkConfig
+from .benchmarks.report import BenchmarkReport
+from .import_utils import get_hf_libs_info
from .launchers.config import LauncherConfig
-from .backends.config import BackendConfig
-from .import_utils import (
- transformers_version,
- accelerate_version,
- diffusers_version,
- optimum_version,
- timm_version,
- peft_version,
-)
-from .env_utils import (
- get_git_revision_hash,
- is_nvidia_system,
- is_rocm_system,
- get_gpu_vram_mb,
- get_cpu_ram_mb,
- get_gpus,
- get_cpu,
-)
+from .system_utils import get_system_info
if TYPE_CHECKING:
# avoid importing any torch to be able to set
# the CUDA_VISIBLE_DEVICES environment variable
# in BackendConfig __post_init__
+ from .backends.base import Backend
from .benchmarks.base import Benchmark
from .launchers.base import Launcher
- from .backends.base import Backend
+from json import dump
+
+import pandas as pd
+from flatten_dict import flatten
+from hydra.utils import get_class
+from transformers.configuration_utils import PushToHubMixin
LOGGER = getLogger("experiment")
+EXPERIMENT_FILE_NAME = "experiment_config.json"
+
@dataclass
-class ExperimentConfig:
+class ExperimentConfig(PushToHubMixin):
# BACKEND CONFIGURATION
backend: Any # https://github.com/facebookresearch/hydra/issues/1722#issuecomment-883568386
# LAUNCHER CONFIGURATION
@@ -59,39 +49,62 @@ class ExperimentConfig:
library: Optional[str] = None # deprecated
# ENVIRONMENT CONFIGURATION
- environment: Dict = field(
- default_factory=lambda: {
- "cpu": get_cpu(),
- "cpu_count": os.cpu_count(),
- "cpu_ram_mb": get_cpu_ram_mb(),
- "system": platform.system(),
- "python_version": platform.python_version(),
- # libraries
- "transformers_version": transformers_version(),
- "transformers_commit": get_git_revision_hash("transformers"),
- "accelerate_version": accelerate_version(),
- "accelerate_commit": get_git_revision_hash("accelerate"),
- "diffusers_version": diffusers_version(),
- "diffusers_commit": get_git_revision_hash("diffusers"),
- "optimum_version": optimum_version(),
- "optimum_commit": get_git_revision_hash("optimum"),
- "timm_version": timm_version(),
- "timm_commit": get_git_revision_hash("timm"),
- "peft_version": peft_version(),
- "peft_commit": get_git_revision_hash("peft"),
- }
- )
-
- def __post_init__(self):
- # adding GPU information to the environment
- if is_nvidia_system() or is_rocm_system():
- available_gpus = get_gpus()
- if len(available_gpus) > 0:
- self.environment["gpu"] = available_gpus[0]
- self.environment["gpu_count"] = len(available_gpus)
- self.environment["gpu_vram_mb"] = get_gpu_vram_mb()
- else:
- LOGGER.warning("Detected NVIDIA or ROCm system, but no GPUs found.")
+ environment: Dict = field(default_factory=lambda: {**get_system_info(), **get_hf_libs_info()})
+
+ def to_dict(self) -> Dict[str, Any]:
+ return asdict(self)
+
+ def to_flat_dict(self) -> Dict[str, Any]:
+ report_dict = self.to_dict()
+ return flatten(report_dict, reducer="dot")
+
+ def to_json(self, path: str, flat: bool = False) -> None:
+ if flat:
+ with open(path, "w") as f:
+ dump(self.to_flat_dict(), f, indent=4)
+ else:
+ with open(path, "w") as f:
+ dump(self.to_dict(), f, indent=4)
+
+ def to_dataframe(self) -> pd.DataFrame:
+ flat_report_dict = self.to_flat_dict()
+ return pd.DataFrame.from_dict(flat_report_dict, orient="index")
+
+ def to_csv(self, path: str) -> None:
+ self.to_dataframe().to_csv(path, index=False)
+
+ def save_pretrained(
+ self,
+ save_directory: Union[str, os.PathLike],
+ config_file_name: Optional[Union[str, os.PathLike]] = None,
+ push_to_hub: bool = False,
+ **kwargs,
+ ):
+ use_auth_token = kwargs.pop("use_auth_token", None)
+
+ if use_auth_token is not None:
+ kwargs["token"] = use_auth_token
+
+ config_file_name = config_file_name if config_file_name is not None else EXPERIMENT_FILE_NAME
+
+ if os.path.isfile(save_directory):
+ raise AssertionError(f"Provided path ({save_directory}) should be a directory, not a file")
+
+ os.makedirs(save_directory, exist_ok=True)
+
+ if push_to_hub:
+ commit_message = kwargs.pop("commit_message", None)
+ repo_id = kwargs.pop("repo_id", save_directory.split(os.path.sep)[-1])
+ repo_id = self._create_repo(repo_id, **kwargs)
+ files_timestamps = self._get_files_timestamps(save_directory)
+
+ output_config_file = os.path.join(save_directory, config_file_name)
+ self.to_json(output_config_file, flat=False)
+
+ if push_to_hub:
+ self._upload_modified_files(
+ save_directory, repo_id, files_timestamps, commit_message=commit_message, token=kwargs.get("token")
+ )
def run(benchmark_config: BenchmarkConfig, backend_config: BackendConfig) -> BenchmarkReport:
@@ -131,11 +144,27 @@ def run(benchmark_config: BenchmarkConfig, backend_config: BackendConfig) -> Ben
def launch(experiment_config: ExperimentConfig) -> BenchmarkReport:
- if os.environ.get("BENCHMARK_CLI", "0") == "0":
+ # fix backend until deprecated model and device are removed
+ if experiment_config.task is not None:
+ LOGGER.warning("`task` is deprecated in experiment config. Use `backend.task` instead.")
+ experiment_config.backend.task = experiment_config.task
+ if experiment_config.model is not None:
+ LOGGER.warning("`model` is deprecated in experiment config. Use `backend.model` instead.")
+ experiment_config.backend.model = experiment_config.model
+ if experiment_config.device is not None:
+ LOGGER.warning("`device` is deprecated in experiment config. Use `backend.device` instead.")
+ experiment_config.backend.device = experiment_config.device
+ if experiment_config.library is not None:
+ LOGGER.warning("`library` is deprecated in experiment config. Use `backend.library` instead.")
+ experiment_config.backend.library = experiment_config.library
+
+ original_dir = os.getcwd()
+ tmpdir = TemporaryDirectory()
+
+ if os.environ.get("BENCHMARK_INTERFACE", "API") == "API":
+ # to not pollute the user's environment
LOGGER.info("Launching experiment in a temporary directory.")
- tmep_dir = TemporaryDirectory()
- original_dir = os.getcwd()
- os.chdir(tmep_dir.name)
+ os.chdir(tmpdir.name)
launcher_config: LauncherConfig = experiment_config.launcher
@@ -145,6 +174,7 @@ def launch(experiment_config: ExperimentConfig) -> BenchmarkReport:
launcher: Launcher = launcher_factory(launcher_config)
except Exception as e:
LOGGER.error(f"Error during launcher allocation: {e}")
+ tmpdir.cleanup()
raise e
backend_config: BackendConfig = experiment_config.backend
@@ -154,10 +184,11 @@ def launch(experiment_config: ExperimentConfig) -> BenchmarkReport:
output = launcher.launch(run, benchmark_config, backend_config)
except Exception as e:
LOGGER.error(f"Error during experiment launching: {e}")
+ tmpdir.cleanup()
raise e
- if os.environ.get("BENCHMARK_CLI", "0") == "0":
+ if os.environ.get("BENCHMARK_INTERFACE", "API") == "API":
os.chdir(original_dir)
- tmep_dir.cleanup()
+ tmpdir.cleanup()
return output
diff --git a/optimum_benchmark/generators/input_generator.py b/optimum_benchmark/generators/input_generator.py
index 13f1d9aa..0dfc3050 100644
--- a/optimum_benchmark/generators/input_generator.py
+++ b/optimum_benchmark/generators/input_generator.py
@@ -28,27 +28,17 @@ def __call__(self, mode: str) -> Dict[str, Any]:
if mode == "generate":
if "pixel_values" in task_input:
# image input
- task_input = {
- "inputs": task_input["pixel_values"],
- }
+ task_input = {"inputs": task_input["pixel_values"]}
elif "input_values" in task_input:
# speech input
- task_input = {
- "inputs": task_input["input_values"],
- }
+ task_input = {"inputs": task_input["input_values"]}
elif "input_features" in task_input:
# waveform input
- task_input = {
- "inputs": task_input["input_features"],
- }
+ task_input = {"inputs": task_input["input_features"]}
elif "input_ids" in task_input:
# text input
- task_input = {
- "inputs": task_input["input_ids"],
- }
+ task_input = {"inputs": task_input["input_ids"]}
elif mode == "call":
- task_input = {
- "prompt": task_input["prompt"],
- }
+ task_input = {"prompt": task_input["prompt"]}
return task_input
diff --git a/optimum_benchmark/generators/task_generator.py b/optimum_benchmark/generators/task_generator.py
index 1f3e9b23..683d8963 100644
--- a/optimum_benchmark/generators/task_generator.py
+++ b/optimum_benchmark/generators/task_generator.py
@@ -43,40 +43,28 @@ def input_ids(self):
return self.generate_random_integers(
min_value=0,
max_value=self.shapes["vocab_size"],
- shape=(
- self.shapes["batch_size"],
- self.shapes["sequence_length"],
- ),
+ shape=(self.shapes["batch_size"], self.shapes["sequence_length"]),
)
def attention_mask(self):
return self.generate_random_integers(
min_value=1, # avoid sparse attention
max_value=2,
- shape=(
- self.shapes["batch_size"],
- self.shapes["sequence_length"],
- ),
+ shape=(self.shapes["batch_size"], self.shapes["sequence_length"]),
)
def token_type_ids(self):
return self.generate_random_integers(
min_value=0,
max_value=self.shapes["type_vocab_size"],
- shape=(
- self.shapes["batch_size"],
- self.shapes["sequence_length"],
- ),
+ shape=(self.shapes["batch_size"], self.shapes["sequence_length"]),
)
def position_ids(self):
return self.generate_ranges(
start=0,
stop=self.shapes["sequence_length"],
- shape=(
- self.shapes["batch_size"],
- self.shapes["sequence_length"],
- ),
+ shape=(self.shapes["batch_size"], self.shapes["sequence_length"]),
)
def requires_token_type_ids(self):
@@ -91,44 +79,28 @@ def pixel_values(self):
return self.generate_random_floats(
min_value=0,
max_value=1,
- shape=(
- self.shapes["batch_size"],
- self.shapes["num_channels"],
- self.shapes["height"],
- self.shapes["width"],
- ),
+ shape=(self.shapes["batch_size"], self.shapes["num_channels"], self.shapes["height"], self.shapes["width"]),
)
class AudioGenerator(TaskGenerator):
def input_values(self):
return self.generate_random_floats(
- min_value=-1,
- max_value=1,
- shape=(
- self.shapes["batch_size"],
- self.shapes["sequence_length"],
- ),
+ min_value=-1, max_value=1, shape=(self.shapes["batch_size"], self.shapes["sequence_length"])
)
def input_features(self):
return self.generate_random_floats(
min_value=-1,
max_value=1,
- shape=(
- self.shapes["batch_size"],
- self.shapes["feature_size"],
- self.shapes["nb_max_frames"],
- ),
+ shape=(self.shapes["batch_size"], self.shapes["feature_size"], self.shapes["nb_max_frames"]),
)
class TextClassificationGenerator(TextGenerator):
def labels(self):
return self.generate_random_integers(
- min_value=0,
- max_value=self.shapes["num_labels"],
- shape=(self.shapes["batch_size"],),
+ min_value=0, max_value=self.shapes["num_labels"], shape=(self.shapes["batch_size"],)
)
def __call__(self):
@@ -154,10 +126,7 @@ def labels(self):
return self.generate_random_integers(
min_value=0,
max_value=self.shapes["num_labels"],
- shape=(
- self.shapes["batch_size"],
- self.shapes["sequence_length"],
- ),
+ shape=(self.shapes["batch_size"], self.shapes["sequence_length"]),
)
def __call__(self):
@@ -199,16 +168,12 @@ def __call__(self):
class QuestionAnsweringGenerator(TextGenerator):
def start_positions(self):
return self.generate_random_integers(
- min_value=0,
- max_value=self.shapes["sequence_length"],
- shape=(self.shapes["batch_size"],),
+ min_value=0, max_value=self.shapes["sequence_length"], shape=(self.shapes["batch_size"],)
)
def end_positions(self):
return self.generate_random_integers(
- min_value=0,
- max_value=self.shapes["sequence_length"],
- shape=(self.shapes["batch_size"],),
+ min_value=0, max_value=self.shapes["sequence_length"], shape=(self.shapes["batch_size"],)
)
def __call__(self):
@@ -247,9 +212,7 @@ def __call__(self):
class MultipleChoiceGenerator(TextGenerator):
def labels(self):
return self.generate_random_integers(
- min_value=0,
- max_value=self.shapes["num_choices"],
- shape=(self.shapes["batch_size"],),
+ min_value=0, max_value=self.shapes["num_choices"], shape=(self.shapes["batch_size"],)
)
def __call__(self):
@@ -283,9 +246,7 @@ def __call__(self):
class ImageClassificationGenerator(ImageGenerator):
def labels(self):
return self.generate_random_integers(
- min_value=0,
- max_value=self.shapes["num_labels"],
- shape=(self.shapes["batch_size"],),
+ min_value=0, max_value=self.shapes["num_labels"], shape=(self.shapes["batch_size"],)
)
def __call__(self):
@@ -303,15 +264,9 @@ def labels(self):
return [
{
"class_labels": self.generate_random_integers(
- min_value=0,
- max_value=self.shapes["num_labels"],
- shape=(self.shapes["num_queries"],),
- ),
- "boxes": self.generate_random_floats(
- min_value=-1,
- max_value=1,
- shape=(self.shapes["num_queries"], 4),
+ min_value=0, max_value=self.shapes["num_labels"], shape=(self.shapes["num_queries"],)
),
+ "boxes": self.generate_random_floats(min_value=-1, max_value=1, shape=(self.shapes["num_queries"], 4)),
}
for _ in range(self.shapes["batch_size"])
]
@@ -331,11 +286,7 @@ def labels(self):
return self.generate_random_integers(
min_value=0,
max_value=self.shapes["num_labels"],
- shape=(
- self.shapes["batch_size"],
- self.shapes["height"],
- self.shapes["width"],
- ),
+ shape=(self.shapes["batch_size"], self.shapes["height"], self.shapes["width"]),
)
def __call__(self):
@@ -351,9 +302,7 @@ def __call__(self):
class AudioClassificationGenerator(AudioGenerator):
def labels(self):
return self.generate_random_integers(
- min_value=0,
- max_value=self.shapes["num_labels"],
- shape=(self.shapes["batch_size"],),
+ min_value=0, max_value=self.shapes["num_labels"], shape=(self.shapes["batch_size"],)
)
def __call__(self):
@@ -371,10 +320,7 @@ def labels(self):
return self.generate_random_integers(
min_value=0,
max_value=self.shapes["vocab_size"],
- shape=(
- self.shapes["batch_size"],
- self.shapes["sequence_length"],
- ),
+ shape=(self.shapes["batch_size"], self.shapes["sequence_length"]),
)
def __call__(self):
diff --git a/optimum_benchmark/import_utils.py b/optimum_benchmark/import_utils.py
index f19fbda3..f247eaf3 100644
--- a/optimum_benchmark/import_utils.py
+++ b/optimum_benchmark/import_utils.py
@@ -1,6 +1,7 @@
import importlib.metadata
import importlib.util
-
+import subprocess
+from typing import Optional
_transformers_available = importlib.util.find_spec("transformers") is not None
_accelerate_available = importlib.util.find_spec("accelerate") is not None
@@ -10,12 +11,11 @@
_onnx_available = importlib.util.find_spec("onnx") is not None
_tensorrt_available = importlib.util.find_spec("tensorrt") is not None
_peft_available = importlib.util.find_spec("peft") is not None
-_py3nvml_available = importlib.util.find_spec("py3nvml") is not None
+_pynvml_available = importlib.util.find_spec("pynvml") is not None
_torch_distributed_available = importlib.util.find_spec("torch.distributed") is not None
_onnxruntime_available = importlib.util.find_spec("onnxruntime") is not None
_openvino_available = importlib.util.find_spec("openvino") is not None
_neural_compressor_available = importlib.util.find_spec("neural_compressor") is not None
-_pyrsmi_available = importlib.util.find_spec("pyrsmi") is not None
_codecarbon_available = importlib.util.find_spec("codecarbon") is not None
_amdsmi_available = importlib.util.find_spec("amdsmi") is not None
_tensorflow_available = importlib.util.find_spec("tensorflow") is not None
@@ -25,6 +25,7 @@
_deepspeed_available = importlib.util.find_spec("deepspeed") is not None
_tensorrt_llm_available = importlib.util.find_spec("tensorrt_llm") is not None
_psutil_available = importlib.util.find_spec("psutil") is not None
+_optimum_benchmark_available = importlib.util.find_spec("optimum_benchmark") is not None
def is_psutil_available():
@@ -83,12 +84,8 @@ def is_onnxruntime_available():
return _onnxruntime_available
-def is_py3nvml_available():
- return _py3nvml_available
-
-
-def is_pyrsmi_available():
- return _pyrsmi_available
+def is_pynvml_available():
+ return _pynvml_available
def is_amdsmi_available():
@@ -178,3 +175,45 @@ def peft_version():
def tesnorrt_llm_version():
if _tensorrt_llm_available:
return importlib.metadata.version("tensorrt_llm")
+
+
+def optimum_benchmark_version():
+ if _optimum_benchmark_available:
+ return importlib.metadata.version("optimum_benchmark")
+
+
+def get_git_revision_hash(package_name: str) -> Optional[str]:
+ """
+ Returns the git commit SHA of a package installed from a git repository.
+ """
+
+ try:
+ path = importlib.util.find_spec(package_name).origin
+ except Exception:
+ return None
+
+ try:
+ git_hash = subprocess.check_output(["git", "rev-parse", "HEAD"], cwd=path).decode().strip()
+ except Exception:
+ return None
+
+ return git_hash
+
+
+def get_hf_libs_info():
+ return {
+ "optimum_benchmark_version": optimum_benchmark_version(),
+ "optimum_benchmark_commit": get_git_revision_hash("optimum_benchmark"),
+ "transformers_version": transformers_version(),
+ "transformers_commit": get_git_revision_hash("transformers"),
+ "accelerate_version": accelerate_version(),
+ "accelerate_commit": get_git_revision_hash("accelerate"),
+ "diffusers_version": diffusers_version(),
+ "diffusers_commit": get_git_revision_hash("diffusers"),
+ "optimum_version": optimum_version(),
+ "optimum_commit": get_git_revision_hash("optimum"),
+ "timm_version": timm_version(),
+ "timm_commit": get_git_revision_hash("timm"),
+ "peft_version": peft_version(),
+ "peft_commit": get_git_revision_hash("peft"),
+ }
diff --git a/optimum_benchmark/launchers/base.py b/optimum_benchmark/launchers/base.py
index 91b50da0..4d5323f4 100644
--- a/optimum_benchmark/launchers/base.py
+++ b/optimum_benchmark/launchers/base.py
@@ -1,7 +1,8 @@
from abc import ABC
from logging import getLogger
-from typing import Callable, ClassVar, Generic, Dict, Any
+from typing import Callable, ClassVar, Generic
+from ..benchmarks.report import BenchmarkReport
from .config import LauncherConfigT
LOGGER = getLogger("launcher")
@@ -16,5 +17,5 @@ def __init__(self, config: LauncherConfigT):
LOGGER.info(f"ููAllocating {self.NAME} launcher")
self.config = config
- def launch(self, worker: Callable, *worker_args) -> Dict[str, Any]:
+ def launch(self, worker: Callable, *worker_args) -> BenchmarkReport:
raise NotImplementedError("Launcher must implement launch method")
diff --git a/optimum_benchmark/launchers/config.py b/optimum_benchmark/launchers/config.py
index 2d04caa4..938c3c97 100644
--- a/optimum_benchmark/launchers/config.py
+++ b/optimum_benchmark/launchers/config.py
@@ -1,7 +1,7 @@
from abc import ABC
-from typing import TypeVar
-from logging import getLogger
from dataclasses import dataclass
+from logging import getLogger
+from typing import TypeVar
LOGGER = getLogger("launcher")
diff --git a/optimum_benchmark/launchers/inline/launcher.py b/optimum_benchmark/launchers/inline/launcher.py
index e5702ba1..64a8002c 100644
--- a/optimum_benchmark/launchers/inline/launcher.py
+++ b/optimum_benchmark/launchers/inline/launcher.py
@@ -1,10 +1,10 @@
-import os
from logging import getLogger
-from typing import Callable, Dict, Any
+from typing import Callable
+from ...benchmarks.report import BenchmarkReport
from ..base import Launcher
-from .config import InlineConfig
from ..isolation_utils import device_isolation
+from .config import InlineConfig
LOGGER = getLogger("inline")
@@ -15,12 +15,9 @@ class InlineLauncher(Launcher[InlineConfig]):
def __init__(self, config: InlineConfig):
super().__init__(config)
- def launch(self, worker: Callable, *worker_args) -> Dict[str, Any]:
- with device_isolation(
- benchmark_pid=os.getpid(),
- enabled=self.config.device_isolation,
- ):
- LOGGER.info("\t+ Launching inline experiment (no process isolation)")
- report: Dict[str, Any] = worker(*worker_args)
+ def launch(self, worker: Callable, *worker_args) -> BenchmarkReport:
+ with device_isolation(enabled=self.config.device_isolation):
+ LOGGER.info("\t+ Launching inline worker (no process isolation)")
+ report = worker(*worker_args)
return report
diff --git a/optimum_benchmark/launchers/isolation_utils.py b/optimum_benchmark/launchers/isolation_utils.py
index 52006bcc..f8a0074c 100644
--- a/optimum_benchmark/launchers/isolation_utils.py
+++ b/optimum_benchmark/launchers/isolation_utils.py
@@ -1,61 +1,64 @@
import os
-import time
import signal
-from typing import Dict, Set
+import time
+from contextlib import contextmanager
from logging import getLogger
from multiprocessing import Process
-from contextlib import contextmanager
+from typing import Dict, Set
+from ..import_utils import is_amdsmi_available, is_psutil_available, is_pynvml_available
from ..logging_utils import setup_logging
-from ..env_utils import is_nvidia_system, is_rocm_system
-from ..import_utils import is_amdsmi_available, is_py3nvml_available, torch_version, is_psutil_available
+from ..system_utils import get_rocm_version, is_nvidia_system, is_rocm_system
if is_psutil_available():
import psutil
-if is_py3nvml_available():
- import py3nvml.py3nvml as nvml
+if is_pynvml_available():
+ import pynvml
if is_amdsmi_available():
- import amdsmi # type: ignore
+ import amdsmi
LOGGER = getLogger("isolation")
def get_nvidia_devices_pids() -> Dict[int, list]:
+ if not is_pynvml_available():
+ raise ValueError(
+ "The library pynvml is required to get the pids running on NVIDIA GPUs, but is not installed. "
+ "Please install the official and NVIDIA maintained PyNVML library through `pip install nvidia-ml-py`."
+ )
+
devices_pids: Dict[int, list] = {}
devices_ids = [int(device_id) for device_id in os.environ["CUDA_VISIBLE_DEVICES"].split(",")]
- if not is_py3nvml_available():
- raise ValueError("get_nvidia_device_pids requires py3nvml. Please install it with `pip install py3nvml`.")
-
- nvml.nvmlInit()
+ pynvml.nvmlInit()
for device_id in devices_ids:
- device_handle = nvml.nvmlDeviceGetHandleByIndex(device_id)
- device_processes = nvml.nvmlDeviceGetComputeRunningProcesses(device_handle)
+ device_handle = pynvml.nvmlDeviceGetHandleByIndex(device_id)
+ device_processes = pynvml.nvmlDeviceGetComputeRunningProcesses(device_handle)
for device_process in device_processes:
if device_id not in devices_pids:
devices_pids[device_id] = []
devices_pids[device_id].append(device_process.pid)
- nvml.nvmlShutdown()
+ pynvml.nvmlShutdown()
return devices_pids
def get_amd_devices_pids() -> Dict[int, list]:
- devices_pids: Dict[int, list] = {}
- rocm_version = torch_version().split("rocm")[-1]
- devices_ids = [int(device_id) for device_id in os.environ["CUDA_VISIBLE_DEVICES"].split(",")]
-
if not is_amdsmi_available():
raise ValueError(
- "get_amd_devices_pids requires amdsmi. "
- "Please follow the instructions at https://github.com/RadeonOpenCompute/amdsmi/tree/master"
+ "The library amdsmi is required get the pids running on AMD GPUs, but is not installed. "
+ "Please install the official and AMD maintained amdsmi library from https://github.com/ROCm/amdsmi."
)
+ devices_pids: Dict[int, list] = {}
+ rocm_version = get_rocm_version()
+ devices_ids = [int(device_id) for device_id in os.environ["CUDA_VISIBLE_DEVICES"].split(",")]
+
amdsmi.amdsmi_init()
if rocm_version >= "5.7":
@@ -115,7 +118,6 @@ def get_amd_devices_pids() -> Dict[int, list]:
def get_pids_running_on_system_device() -> Set[int]:
"""Returns the set of pids running on the system device(s)."""
-
if is_nvidia_system():
devices_pids = get_nvidia_devices_pids()
elif is_rocm_system():
@@ -128,29 +130,28 @@ def get_pids_running_on_system_device() -> Set[int]:
return all_devices_pids
-def assert_system_devices_isolation(benchmark_pid: int) -> None:
+def assert_system_devices_isolation(main_pid: int) -> None:
setup_logging("ERROR")
-
isolation_pid = os.getpid()
- while psutil.pid_exists(benchmark_pid):
+ while psutil.pid_exists(main_pid):
child_processes = set()
non_permitted_pids = set()
all_devices_pids = get_pids_running_on_system_device()
for pid in list(all_devices_pids):
- if pid == benchmark_pid or pid == isolation_pid:
+ if pid == main_pid or pid == isolation_pid:
continue
try:
info = psutil.Process(pid)
parent_pid = info.ppid()
except Exception as e:
- LOGGER.error(f"Failed to get info for process {pid} with error {e}")
+ LOGGER.error(f"Failed to get parent pid for process {pid} with error {e}")
parent_pid = None
- if parent_pid == benchmark_pid or parent_pid == isolation_pid:
+ if parent_pid == main_pid or parent_pid == isolation_pid:
child_processes.add(pid)
else:
non_permitted_pids.add(pid)
@@ -159,29 +160,25 @@ def assert_system_devices_isolation(benchmark_pid: int) -> None:
LOGGER.error(f"Found non-permitted process(es) running on system device(s): {non_permitted_pids}")
for pid in child_processes:
try:
- LOGGER.error(f"Terminating child process {pid}")
- os.kill(pid, signal.SIGTERM)
+ LOGGER.error(f"Interrupting child process {pid} of main process {main_pid}")
+ os.kill(pid, signal.SIGINT)
except Exception as e:
LOGGER.error(f"Failed to terminate child process {pid} with error {e}")
- LOGGER.error(f"Terminating benchmark process {benchmark_pid}")
- os.kill(benchmark_pid, signal.SIGTERM)
- break
+ LOGGER.error(f"Interrupting main process {main_pid}...")
+ os.kill(main_pid, signal.SIGINT)
+ exit(1)
time.sleep(1)
@contextmanager
-def device_isolation(benchmark_pid: int, enabled: bool):
+def device_isolation(enabled: bool):
if not enabled:
yield
return
- isolation_process = Process(
- target=assert_system_devices_isolation,
- kwargs={"benchmark_pid": benchmark_pid},
- daemon=True,
- )
+ isolation_process = Process(target=assert_system_devices_isolation, kwargs={"main_pid": os.getpid()}, daemon=True)
isolation_process.start()
LOGGER.info(f"\t+ Launched device(s) isolation process {isolation_process.pid}.")
diff --git a/optimum_benchmark/launchers/process/launcher.py b/optimum_benchmark/launchers/process/launcher.py
index b2619d2f..c08061a5 100644
--- a/optimum_benchmark/launchers/process/launcher.py
+++ b/optimum_benchmark/launchers/process/launcher.py
@@ -1,13 +1,13 @@
-import os
-import multiprocessing as mp
from logging import getLogger
-from typing import Callable, Dict, Any
-from multiprocessing import Process, Queue
+from typing import Callable
-from ..isolation_utils import device_isolation
+import torch.multiprocessing as mp
+
+from ...benchmarks.report import BenchmarkReport
from ...logging_utils import setup_logging
-from .config import ProcessConfig
from ..base import Launcher
+from ..isolation_utils import device_isolation
+from .config import ProcessConfig
LOGGER = getLogger("process")
@@ -22,35 +22,44 @@ def __init__(self, config: ProcessConfig):
LOGGER.info(f"\t+ Setting multiprocessing start method to {self.config.start_method}.")
mp.set_start_method(self.config.start_method, force=True)
- def launch(self, worker: Callable, *worker_args) -> Dict[str, Any]:
- # worker process can't be daemon since it might spawn its own processes
- queue = Queue()
- current_log_level = getLogger().getEffectiveLevel()
- worker_process = Process(
- daemon=False,
- target=target,
- args=(worker, queue, current_log_level, *worker_args),
- )
- worker_process.start()
- LOGGER.info(f"\t+ Launched worker process with PID {worker_process.pid}.")
+ def launch(self, worker: Callable, *worker_args) -> BenchmarkReport:
+ log_level = getLogger().getEffectiveLevel()
+
+ ctx = mp.get_context(self.config.start_method)
+ queue = ctx.Queue()
+ lock = ctx.Lock()
- with device_isolation(enabled=self.config.device_isolation, benchmark_pid=os.getpid()):
- worker_process.join()
+ with device_isolation(enabled=self.config.device_isolation):
+ process_context = mp.start_processes(
+ entrypoint,
+ args=(worker, queue, lock, log_level, *worker_args),
+ start_method=self.config.start_method,
+ daemon=False,
+ join=False,
+ nprocs=1,
+ )
+ LOGGER.info(f"\t+ Launched worker process(es) with PID(s): {process_context.pids()}")
+ while not process_context.join():
+ pass
- if worker_process.exitcode != 0:
- LOGGER.error(f"\t+ Worker process exited with code {worker_process.exitcode}, forwarding...")
- exit(worker_process.exitcode)
+ # restore the original logging configuration
+ setup_logging(log_level)
- report = queue.get()
+ report: BenchmarkReport = queue.get()
return report
-def target(fn, q, log_level, *args):
- """This a pickalable function that correctly sets up the logging configuration for the worker process."""
+def entrypoint(i, worker, queue, lock, log_level, *worker_args):
+ """
+ This a pickalable function that correctly sets up the logging configuration for the worker process,
+ and puts the output of the worker function into a lock-protected queue.
+ """
- setup_logging(log_level)
+ setup_logging(log_level, prefix=f"PROC-{i}")
- out = fn(*args)
+ worker_output = worker(*worker_args)
- q.put(out)
+ lock.acquire()
+ queue.put(worker_output)
+ lock.release()
diff --git a/optimum_benchmark/launchers/torchrun/config.py b/optimum_benchmark/launchers/torchrun/config.py
index 2d87ff03..c1fbfc38 100644
--- a/optimum_benchmark/launchers/torchrun/config.py
+++ b/optimum_benchmark/launchers/torchrun/config.py
@@ -1,7 +1,7 @@
import uuid
+from dataclasses import dataclass, field
from logging import getLogger
from typing import Any, Dict, Optional
-from dataclasses import dataclass, field
from ..config import LauncherConfig
diff --git a/optimum_benchmark/launchers/torchrun/launcher.py b/optimum_benchmark/launchers/torchrun/launcher.py
index f327e85c..d5351a34 100644
--- a/optimum_benchmark/launchers/torchrun/launcher.py
+++ b/optimum_benchmark/launchers/torchrun/launcher.py
@@ -1,23 +1,17 @@
-import os
-import multiprocessing as mp
from logging import getLogger
-from multiprocessing import Queue
-from typing import Callable, Dict, Any
+from typing import Any, Callable, Dict, List
-from ..base import Launcher
-from .config import TorchrunConfig
+import torch.distributed
+import torch.multiprocessing as mp
+from torch.distributed.elastic.multiprocessing import Std
+from torch.distributed.elastic.multiprocessing.errors import record
+from torch.distributed.launcher.api import LaunchConfig, launch_agent
+
+from ...benchmarks.report import BenchmarkReport
from ...logging_utils import setup_logging
+from ..base import Launcher
from ..isolation_utils import device_isolation
-from ...benchmarks.report import BenchmarkReport
-from ...import_utils import is_torch_distributed_available
-
-if is_torch_distributed_available():
- import torch.distributed
- from torch.distributed import FileStore
- from torch.distributed.elastic.multiprocessing import Std
- from torch.distributed.elastic.multiprocessing.errors import record
- from torch.distributed.launcher.api import LaunchConfig, launch_agent
-
+from .config import TorchrunConfig
LOGGER = getLogger("torchrun")
@@ -33,6 +27,7 @@ def __init__(self, config: TorchrunConfig):
mp.set_start_method(self.config.start_method, force=True)
def launch(self, worker: Callable, *worker_args) -> Dict[str, Any]:
+ log_level = getLogger().getEffectiveLevel()
launch_config = LaunchConfig(
min_nodes=self.config.min_nodes,
max_nodes=self.config.max_nodes,
@@ -51,55 +46,51 @@ def launch(self, worker: Callable, *worker_args) -> Dict[str, Any]:
local_addr=self.config.local_addr,
log_dir=self.config.log_dir,
)
- queue = Queue()
- current_log_level = getLogger().getEffectiveLevel()
- with device_isolation(enabled=self.config.device_isolation, benchmark_pid=os.getpid()):
+ ctx = mp.get_context(self.config.start_method)
+ queue = ctx.Queue()
+ lock = ctx.Lock()
+
+ with device_isolation(enabled=self.config.device_isolation):
LOGGER.info(f"\t+ Launching torchrun agent with {self.config.nproc_per_node} workers processes")
launch_agent(
- config=launch_config,
- entrypoint=entrypoint,
- args=(worker, queue, current_log_level, *worker_args),
+ entrypoint=entrypoint, args=(worker, queue, lock, log_level, *worker_args), config=launch_config
)
- outputs = []
+ # restore the original logging configuration
+ setup_logging(log_level)
+ reports: List[BenchmarkReport] = []
while not queue.empty():
- outputs.append(queue.get())
+ reports.append(queue.get())
- if len(outputs) == 1:
- report: BenchmarkReport = outputs[0]
+ if len(reports) > 1:
+ LOGGER.info(f"\t+ Merging benchmark reports from {len(reports)} workers")
+ report = reports[0].aggregate(reports)
+ elif len(reports) == 1:
+ report = reports[0]
else:
- LOGGER.info(f"\t+ Merging benchmark reports from {len(outputs)} workers")
- report: BenchmarkReport = sum(outputs[1:], outputs[0])
- report.log_all()
+ raise ValueError("No benchmark report was returned by the workers")
+
+ report.log()
return report
@record
-def entrypoint(fn, q, log_level, *args):
+def entrypoint(worker, queue, lock, log_level, *worker_args):
"""
This a pickalable function that correctly sets up the logging configuration
"""
- if not torch.distributed.is_initialized():
- # initialize the process group if not already initialized
- backend = "nccl" if torch.cuda.is_available() else "gloo"
- torch.distributed.init_process_group(backend=backend)
- rank = torch.distributed.get_rank()
-
- if torch.cuda.is_available():
- torch.cuda.set_device(rank)
+ torch.distributed.init_process_group(backend="nccl" if torch.cuda.is_available() else "gloo")
- if rank == 0:
- setup_logging(level=log_level, prefix="RANK-0")
- else:
- setup_logging(level="ERROR")
+ rank = torch.distributed.get_rank()
+ torch.cuda.set_device(rank) if torch.cuda.is_available() else None
+ setup_logging(level=log_level, prefix=f"RANK-{rank}") if rank == 0 else None
- # TODO: use a tcp store instead
- store = FileStore("torchrun.filestore")
- store.set(f"rank_{rank}", str(os.getpid()))
+ output = worker(*worker_args)
- output = fn(*args)
- q.put(output)
+ lock.acquire()
+ queue.put(output)
+ lock.release()
diff --git a/optimum_benchmark/logging_utils.py b/optimum_benchmark/logging_utils.py
index 72f76889..c4c5ab6a 100644
--- a/optimum_benchmark/logging_utils.py
+++ b/optimum_benchmark/logging_utils.py
@@ -1,9 +1,9 @@
-import os
import logging
import logging.config
+import os
from logging import Logger
+from subprocess import PIPE, STDOUT, Popen
from typing import Optional
-from subprocess import Popen, PIPE, STDOUT
from omegaconf import OmegaConf
@@ -14,34 +14,19 @@
"colorlog": {
"()": "colorlog.ColoredFormatter",
"format": "[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - %(message)s",
- "log_colors": {
- "DEBUG": "purple",
- "INFO": "green",
- "WARNING": "yellow",
- "CRITICAL": "red",
- "ERROR": "red",
- },
- },
- },
- "handlers": {
- "console": {
- "formatter": "colorlog",
- "stream": "ext://sys.stdout",
- "class": "logging.StreamHandler",
+ "log_colors": {"DEBUG": "purple", "INFO": "green", "WARNING": "yellow", "CRITICAL": "red", "ERROR": "red"},
},
},
+ "handlers": {"console": {"formatter": "colorlog", "stream": "ext://sys.stdout", "class": "logging.StreamHandler"}},
"root": {"level": "INFO", "handlers": ["console"]},
"disable_existing_loggers": False,
}
def setup_logging(level: str = "INFO", prefix: Optional[str] = None):
- if os.environ.get("BENCHMARK_CLI", "0") == "1":
+ if os.environ.get("BENCHMARK_INTERFACE", "API") == "CLI":
hydra_config = OmegaConf.load(".hydra/hydra.yaml")
- job_logging = OmegaConf.to_container(
- hydra_config.hydra.job_logging,
- resolve=True,
- )
+ job_logging = OmegaConf.to_container(hydra_config.hydra.job_logging, resolve=True)
else:
job_logging = API_JOB_LOGGING.copy()
diff --git a/optimum_benchmark/system_utils.py b/optimum_benchmark/system_utils.py
new file mode 100644
index 00000000..52d59383
--- /dev/null
+++ b/optimum_benchmark/system_utils.py
@@ -0,0 +1,219 @@
+import os
+import platform
+import re
+import subprocess
+from typing import List, Optional
+
+import psutil
+
+from .import_utils import is_amdsmi_available, is_pynvml_available
+
+
+## CPU related stuff
+def get_cpu() -> Optional[str]:
+ if platform.system() == "Windows":
+ return platform.processor()
+
+ elif platform.system() == "Darwin":
+ command = "sysctl -n machdep.cpu.brand_string"
+ return str(subprocess.check_output(command, shell=True).decode().strip())
+
+ elif platform.system() == "Linux":
+ command = "cat /proc/cpuinfo"
+ all_info = subprocess.check_output(command, shell=True).decode().strip()
+ for line in all_info.split("\n"):
+ if "model name" in line:
+ return re.sub(".*model name.*:", "", line, 1)
+ return "Could not find device name"
+
+ else:
+ raise ValueError(f"Unknown system '{platform.system()}'")
+
+
+def get_cpu_ram_mb():
+ return psutil.virtual_memory().total / 1e6
+
+
+## GPU related stuff
+try:
+ subprocess.check_output("nvidia-smi")
+ _nvidia_system = True
+except Exception:
+ _nvidia_system = False
+
+try:
+ subprocess.check_output("rocm-smi")
+ _rocm_system = True
+except Exception:
+ _rocm_system = False
+
+
+def is_nvidia_system():
+ return _nvidia_system
+
+
+def is_rocm_system():
+ return _rocm_system
+
+
+if is_nvidia_system() and is_pynvml_available():
+ import pynvml
+
+if is_rocm_system() and is_amdsmi_available():
+ import amdsmi
+
+
+def get_rocm_version():
+ for folder in os.listdir("/opt/"):
+ if "rocm" in folder and "rocm" != folder:
+ return folder.split("-")[-1]
+ raise ValueError("Could not find ROCm version.")
+
+
+def get_gpus():
+ if is_nvidia_system():
+ if not is_pynvml_available():
+ raise ValueError(
+ "The library pynvml is required to run memory benchmark on NVIDIA GPUs, but is not installed. "
+ "Please install the official and NVIDIA maintained PyNVML library through `pip install nvidia-ml-py`."
+ )
+
+ gpus = []
+ pynvml.nvmlInit()
+ device_count = pynvml.nvmlDeviceGetCount()
+ for i in range(device_count):
+ handle = pynvml.nvmlDeviceGetHandleByIndex(i)
+ gpus.append(pynvml.nvmlDeviceGetName(handle))
+ pynvml.nvmlShutdown()
+ elif is_rocm_system():
+ if not is_amdsmi_available():
+ raise ValueError(
+ "The library amdsmi is required to run memory benchmark on AMD GPUs, but is not installed. "
+ "Please install the official and AMD maintained amdsmi library from https://github.com/ROCm/amdsmi."
+ )
+
+ gpus = []
+ amdsmi.amdsmi_init()
+ rocm_version = get_rocm_version()
+ if rocm_version >= "5.7":
+ devices_handles = amdsmi.amdsmi_get_processor_handles()
+ for device_handle in devices_handles:
+ gpus.append(amdsmi.amdsmi_get_gpu_vendor_name(device_handle))
+ else:
+ devices_handles = amdsmi.amdsmi_get_device_handles()
+ for device_handle in devices_handles:
+ gpus.append(amdsmi.amdsmi_dev_get_vendor_name(device_handle))
+ amdsmi.amdsmi_shut_down()
+ else:
+ raise ValueError("No NVIDIA or ROCm GPUs found.")
+
+ return gpus
+
+
+def get_gpu_vram_mb() -> List[int]:
+ if is_nvidia_system():
+ if not is_pynvml_available():
+ raise ValueError(
+ "The library pynvml is required to run memory benchmark on NVIDIA GPUs, but is not installed. "
+ "Please install the official and NVIDIA maintained PyNVML library through `pip install nvidia-ml-py`."
+ )
+
+ pynvml.nvmlInit()
+ device_count = pynvml.nvmlDeviceGetCount()
+ vrams = [
+ pynvml.nvmlDeviceGetMemoryInfo(pynvml.nvmlDeviceGetHandleByIndex(i)).total for i in range(device_count)
+ ]
+ pynvml.nvmlShutdown()
+ elif is_rocm_system():
+ if not is_amdsmi_available():
+ raise ValueError(
+ "The library amdsmi is required to run memory benchmark on AMD GPUs, but is not installed. "
+ "Please install the official and AMD maintained amdsmi library from https://github.com/ROCm/amdsmi."
+ )
+
+ amdsmi.amdsmi_init()
+ rocm_version = get_rocm_version()
+
+ if rocm_version >= "5.7":
+ device_handles = amdsmi.amdsmi_get_processor_handles()
+ vrams = [
+ amdsmi.amdsmi_get_gpu_memory_total(device_handle, mem_type=amdsmi.AmdSmiMemoryType.VRAM)
+ for device_handle in device_handles
+ ]
+ else:
+ device_handles = amdsmi.amdsmi_get_device_handles()
+ vrams = [
+ amdsmi.amdsmi_dev_get_memory_total(device_handle, mem_type=amdsmi.AmdSmiMemoryType.VRAM)
+ for device_handle in device_handles
+ ]
+
+ amdsmi.amdsmi_shut_down()
+
+ else:
+ raise ValueError("No NVIDIA or ROCm GPUs found.")
+
+ return sum(vrams)
+
+
+def get_gpu_device_ids() -> str:
+ if os.environ.get("CUDA_VISIBLE_DEVICES", None) is not None:
+ device_ids = os.environ["CUDA_VISIBLE_DEVICES"]
+ elif os.environ.get("GPU_DEVICE_ORDINAL", None) is not None:
+ device_ids = os.environ["GPU_DEVICE_ORDINAL"]
+ elif os.environ.get("HIP_VISIBLE_DEVICES", None) is not None:
+ device_ids = os.environ["HIP_VISIBLE_DEVICES"]
+ elif os.environ.get("ROCR_VISIBLE_DEVICES", None) is not None:
+ device_ids = os.environ["ROCR_VISIBLE_DEVICES"]
+ elif is_nvidia_system():
+ if not is_pynvml_available():
+ raise ValueError(
+ "The library pynvml is required to run memory benchmark on NVIDIA GPUs, but is not installed. "
+ "Please install the official and NVIDIA maintained PyNVML library through `pip install nvidia-ml-py`."
+ )
+
+ pynvml.nvmlInit()
+ device_ids = list(range(pynvml.nvmlDeviceGetCount()))
+ device_ids = ",".join(str(i) for i in device_ids)
+ pynvml.nvmlShutdown()
+ elif is_rocm_system():
+ if not is_amdsmi_available():
+ raise ValueError(
+ "The library amdsmi is required to run memory benchmark on AMD GPUs, but is not installed. "
+ "Please install the official and AMD maintained amdsmi library from https://github.com/ROCm/amdsmi."
+ )
+
+ amdsmi.amdsmi_init()
+ rocm_version = get_rocm_version()
+
+ if rocm_version >= "5.7":
+ device_ids = list(range(len(amdsmi.amdsmi_get_processor_handles())))
+ else:
+ device_ids = list(range(len(amdsmi.amdsmi_get_device_handles())))
+
+ device_ids = ",".join(str(i) for i in device_ids)
+ amdsmi.amdsmi_shut_down()
+ else:
+ raise ValueError("Couldn't infer GPU device ids.")
+
+ return device_ids
+
+
+## System related stuff
+def get_system_info() -> dict:
+ system_dict = {
+ "cpu": get_cpu(),
+ "cpu_count": os.cpu_count(),
+ "cpu_ram_mb": get_cpu_ram_mb(),
+ "system": platform.system(),
+ "machine": platform.machine(),
+ "platform": platform.platform(),
+ "processor": platform.processor(),
+ "python_version": platform.python_version(),
+ }
+
+ if is_nvidia_system() or is_rocm_system():
+ system_dict["gpu"] = get_gpus()
+ system_dict["gpu_count"] = len(get_gpus())
+ system_dict["gpu_vram_mb"] = get_gpu_vram_mb()
+
+ return system_dict
diff --git a/optimum_benchmark/task_utils.py b/optimum_benchmark/task_utils.py
index e35baae3..bd7d7999 100644
--- a/optimum_benchmark/task_utils.py
+++ b/optimum_benchmark/task_utils.py
@@ -1,5 +1,5 @@
-import os
import importlib
+import os
from typing import Optional
import huggingface_hub
@@ -16,10 +16,7 @@
"feature-extraction": "AutoModel",
"fill-mask": "AutoModelForMaskedLM",
"image-classification": "AutoModelForImageClassification",
- "image-segmentation": (
- "AutoModelForImageSegmentation",
- "AutoModelForSemanticSegmentation",
- ),
+ "image-segmentation": ("AutoModelForImageSegmentation", "AutoModelForSemanticSegmentation"),
"image-to-image": "AutoModelForImageToImage",
"image-to-text": "AutoModelForVision2Seq",
"mask-generation": "AutoModel",
@@ -64,12 +61,8 @@
"stable-diffusion": "StableDiffusionPipeline",
"stable-diffusion-xl": "StableDiffusionXLImg2ImgPipeline",
}
-_TIMM_TASKS_TO_MODEL_LOADERS = {
- "image-classification": "create_model",
-}
-_LIBRARY_TO_TF_TASKS_TO_MODEL_LOADER_MAP = {
- "transformers": _TRANSFORMERS_TASKS_TO_TF_MODEL_LOADERS,
-}
+_TIMM_TASKS_TO_MODEL_LOADERS = {"image-classification": "create_model"}
+_LIBRARY_TO_TF_TASKS_TO_MODEL_LOADER_MAP = {"transformers": _TRANSFORMERS_TASKS_TO_TF_MODEL_LOADERS}
_LIBRARY_TO_TASKS_TO_MODEL_LOADER_MAP = {
"transformers": _TRANSFORMERS_TASKS_TO_MODEL_LOADERS,
"diffusers": _DIFFUSERS_TASKS_TO_MODEL_LOADERS,
@@ -96,35 +89,15 @@
"zero-shot-classification": "text-classification",
}
_CUSTOM_CLASSES = {
- ("pt", "pix2struct", "image-to-text"): (
- "transformers",
- "Pix2StructForConditionalGeneration",
- ),
- ("pt", "pix2struct", "visual-question-answering"): (
- "transformers",
- "Pix2StructForConditionalGeneration",
- ),
- ("pt", "visual-bert", "question-answering"): (
- "transformers",
- "VisualBertForQuestionAnswering",
- ),
- ("pt", "vision-encoder-decoder", "document-question-answering"): (
- "transformers",
- "VisionEncoderDecoderModel",
- ),
+ ("pt", "pix2struct", "image-to-text"): ("transformers", "Pix2StructForConditionalGeneration"),
+ ("pt", "pix2struct", "visual-question-answering"): ("transformers", "Pix2StructForConditionalGeneration"),
+ ("pt", "visual-bert", "question-answering"): ("transformers", "VisualBertForQuestionAnswering"),
+ ("pt", "vision-encoder-decoder", "document-question-answering"): ("transformers", "VisionEncoderDecoderModel"),
}
-IMAGE_DIFFUSION_TASKS = [
- "stable-diffusion",
- "stable-diffusion-xl",
-]
+IMAGE_DIFFUSION_TASKS = ["stable-diffusion", "stable-diffusion-xl"]
-TEXT_GENERATION_TASKS = [
- "image-to-text",
- "text-generation",
- "text2text-generation",
- "automatic-speech-recognition",
-]
+TEXT_GENERATION_TASKS = ["image-to-text", "text-generation", "text2text-generation", "automatic-speech-recognition"]
def map_from_synonym(task: str) -> str:
@@ -166,10 +139,7 @@ def infer_task_from_model_name_or_path(model_name_or_path: str, revision: Option
else:
pipeline_tag = getattr(model_info, "pipeline_tag", None)
# conversational is not a supported task per se, just an alias that may map to text-generaton or text2text-generation
- if pipeline_tag is not None and pipeline_tag not in [
- "conversational",
- "object-detection",
- ]:
+ if pipeline_tag is not None and pipeline_tag not in ["conversational", "object-detection"]:
inferred_task_name = map_from_synonym(model_info.pipeline_tag)
else:
transformers_info = model_info.transformersInfo
diff --git a/optimum_benchmark/trackers/energy.py b/optimum_benchmark/trackers/energy.py
index 7d3bb7ad..d5335b5d 100644
--- a/optimum_benchmark/trackers/energy.py
+++ b/optimum_benchmark/trackers/energy.py
@@ -1,35 +1,99 @@
import os
-from logging import getLogger
from contextlib import contextmanager
-from typing import Optional, Dict
+from dataclasses import dataclass
+from logging import getLogger
+from typing import List, Literal, Optional
-from ..env_utils import get_cuda_device_ids
-from ..import_utils import is_codecarbon_available
+from ..import_utils import is_codecarbon_available, is_torch_distributed_available
+from ..system_utils import get_gpu_device_ids
-if is_codecarbon_available():
- from codecarbon import EmissionsTracker, OfflineEmissionsTracker
+if is_torch_distributed_available():
+ import torch.distributed
+if is_codecarbon_available():
+ from codecarbon import (
+ EmissionsTracker, # type: ignore
+ OfflineEmissionsTracker,
+ )
LOGGER = getLogger("energy")
+ENERGY_UNIT = "kWh"
+Energy_Unit_Literal = Literal["kWh"]
+Efficiency_Unit_Literal = Literal["samples/kWh", "tokens/kWh", "images/kWh"]
+
+
+@dataclass
+class Energy:
+ unit: Energy_Unit_Literal
+
+ cpu: float
+ ram: float
+ gpu: float
+ total: float
+
+ @staticmethod
+ def aggregate(energies: List["Energy"]) -> "Energy":
+ if len(energies) == 0 or all(energy is None for energy in energies):
+ return None
+ elif any(energy is None for energy in energies):
+ raise ValueError("Some energy measurements are missing")
+
+ cpu = sum(energy.cpu for energy in energies)
+ gpu = sum(energy.gpu for energy in energies)
+ ram = sum(energy.ram for energy in energies)
+ total = sum(energy.total for energy in energies)
+
+ return Energy(cpu=cpu, gpu=gpu, ram=ram, total=total, unit=ENERGY_UNIT)
+
+ def log(self, prefix: str = "forward"):
+ LOGGER.info(f"\t\t+ {prefix} CPU energy: {self.cpu:f} ({self.unit})")
+ LOGGER.info(f"\t\t+ {prefix} GPU energy: {self.gpu:f} ({self.unit})")
+ LOGGER.info(f"\t\t+ {prefix} RAM energy: {self.ram:f} ({self.unit})")
+ LOGGER.info(f"\t\t+ {prefix} total energy: {self.total:f} ({self.unit})")
+
+
+@dataclass
+class Efficiency:
+ unit: Efficiency_Unit_Literal
+
+ value: float
+
+ @staticmethod
+ def aggregate(efficiencies: List["Efficiency"]) -> "Efficiency":
+ if len(efficiencies) == 0:
+ raise ValueError("No efficiency measurements to aggregate")
+ elif any(efficiency is None for efficiency in efficiencies):
+ raise ValueError("Some efficiency measurements are None")
+
+ unit = efficiencies[0].unit
+ value = sum(efficiency.value for efficiency in efficiencies) / len(efficiencies)
+
+ return Efficiency(value=value, unit=unit)
+
+ @staticmethod
+ def from_energy(energy: "Energy", volume: int, unit: str) -> "Efficiency":
+ return Efficiency(value=volume / energy.total if energy.total > 0 else 0, unit=unit)
+
+ def log(self, prefix: str = "forward"):
+ LOGGER.info(f"\t\t+ {prefix} efficiency: {self.value:f} ({self.unit})")
+
class EnergyTracker:
def __init__(self, device: str, device_ids: Optional[str] = None):
self.device = device
-
- self.cpu_energy: float = 0
- self.gpu_energy: float = 0
- self.ram_energy: float = 0
- self.total_energy: float = 0
+ self.device_ids = device_ids
+ self.distributed = is_torch_distributed_available() and torch.distributed.is_initialized()
if self.device == "cuda":
- if device_ids is None:
+ if self.device_ids is None:
LOGGER.warning("\t+ `device=cuda` but `device_ids` not provided. Using all available CUDA devices.")
- self.device_ids = list(map(int, get_cuda_device_ids().split(",")))
- else:
- self.device_ids = list(map(int, device_ids.split(",")))
- else:
- self.device_ids = []
+ self.device_ids = get_gpu_device_ids()
+
+ self.device_ids = list(map(int, self.device_ids.split(",")))
+ LOGGER.info(f"\t+ Tracking GPU energy on devices {self.device_ids}")
+
+ self.reset()
def reset(self):
self.cpu_energy = 0
@@ -72,10 +136,16 @@ def track(self, interval=1, file_prefix="method"):
country_iso_code=os.environ.get("COUNTRY_ISO_CODE", "FRA"),
)
+ if self.distributed:
+ torch.distributed.barrier(device_ids=[torch.cuda.current_device()] if self.device == "cuda" else None)
+
self.emission_tracker.start()
yield
self.emission_tracker.stop()
+ if self.distributed:
+ torch.distributed.barrier(device_ids=[torch.cuda.current_device()] if self.device == "cuda" else None)
+
self.cpu_energy = self.emission_tracker._total_cpu_energy.kWh
self.gpu_energy = self.emission_tracker._total_gpu_energy.kWh
self.ram_energy = self.emission_tracker._total_ram_energy.kWh
@@ -84,10 +154,7 @@ def track(self, interval=1, file_prefix="method"):
def get_elapsed_time(self) -> float:
return self.emission_tracker._last_measured_time - self.emission_tracker._start_time
- def get_energies_dict(self) -> Dict[str, float]:
- return {
- "cpu_energy(kHh)": self.cpu_energy,
- "gpu_energy(kHh)": self.gpu_energy,
- "ram_energy(kHh)": self.ram_energy,
- "total(kHh)": self.total_energy,
- }
+ def get_energy(self) -> Energy:
+ return Energy(
+ unit=ENERGY_UNIT, cpu=self.cpu_energy, gpu=self.gpu_energy, ram=self.ram_energy, total=self.total_energy
+ )
diff --git a/optimum_benchmark/trackers/latency.py b/optimum_benchmark/trackers/latency.py
index 369c2b70..e076875f 100644
--- a/optimum_benchmark/trackers/latency.py
+++ b/optimum_benchmark/trackers/latency.py
@@ -1,91 +1,241 @@
+import time
from contextlib import contextmanager
+from dataclasses import dataclass
from logging import getLogger
-from typing import List
-import time
+from typing import List, Literal, Union
-from ..import_utils import is_torch_distributed_available, is_torch_available
-
-if is_torch_available():
- import torch
+from ..import_utils import is_torch_distributed_available
if is_torch_distributed_available():
import torch.distributed
+import torch
+from transformers import LogitsProcessor, TrainerCallback
+
LOGGER = getLogger("latency")
+LATENCY_UNIT = "s"
+Latency_Unit_Literal = Literal["s"]
+Throughput_Unit_Literal = Literal["samples/s", "tokens/s", "images/s", "steps/s"]
+
+
+@dataclass
+class Latency:
+ unit: Latency_Unit_Literal
+
+ mean: float
+ stdev: float
+ values: List[float]
+
+ def __getitem__(self, index: int) -> float:
+ if isinstance(index, slice):
+ return Latency.from_values(values=self.values[index], unit=self.unit)
+ else:
+ return Latency.from_values(values=[self.values[index]], unit=self.unit)
+
+ def __sub__(self, scalar: float) -> "Latency":
+ if not isinstance(scalar, (int, float)):
+ raise ValueError(f"Cannot subtract non-scalar value from latency: {scalar}")
+
+ latencies = [lat - scalar for lat in self.values]
+ return Latency.from_values(values=latencies, unit=self.unit)
+
+ @staticmethod
+ def aggregate(latencies: List["Latency"]) -> "Latency":
+ if len(latencies) == 0 or all(latency is None for latency in latencies):
+ return None
+ elif any(latency is None for latency in latencies):
+ raise ValueError("Some latency measurements are missing")
+
+ unit = latencies[0].unit
+ values = sum((lat.values for lat in latencies), [])
+ return Latency.from_values(values=values, unit=unit)
+
+ @staticmethod
+ def from_values(values: List[float], unit: str) -> "Latency":
+ mean = sum(values) / len(values) if len(values) > 0 else 0
+ stdev = (sum((val - mean) ** 2 for val in values) / len(values)) ** 0.5 if len(values) > 1 else 0
+ return Latency(mean=mean, stdev=stdev, values=values, unit=unit)
+
+ def log(self, prefix: str = "forward"):
+ LOGGER.info(f"\t\t+ {prefix} latency: {self.mean:f} ยฑ 2 x {self.stdev:f} ({self.unit})")
+
+
+@dataclass
+class Throughput:
+ unit: Throughput_Unit_Literal
+
+ value: float
+
+ @staticmethod
+ def aggregate(throughputs: List["Throughput"]) -> "Throughput":
+ if len(throughputs) == 0:
+ raise ValueError("No throughput measurements to aggregate")
+ elif any(throughput is None for throughput in throughputs):
+ raise ValueError("Some throughput measurements are missing")
+
+ unit = throughputs[0].unit
+ value = sum(throughput.value for throughput in throughputs)
+
+ return Throughput(value=value, unit=unit)
+
+ @staticmethod
+ def from_latency(latency: Latency, volume: int, unit: str) -> "Throughput":
+ value = volume / latency.mean if latency.mean > 0 else 0
+ return Throughput(value=value, unit=unit)
+
+ def log(self, prefix: str = "forward"):
+ LOGGER.info(f"\t\t+ {prefix} throughput: {self.value:f} {self.unit}")
+
class LatencyTracker:
def __init__(self, device: str, backend: str):
self.device = device
self.backend = backend
+ self.distributed = is_torch_distributed_available() and torch.distributed.is_initialized()
- self.latencies: List[float] = []
-
- # this is not in track, because this tracker is used repeatedly
- if is_torch_distributed_available() and torch.distributed.is_initialized():
- LOGGER.info("\t+ Tracking Pytorch Distributed latency")
- elif self.device == "cuda" and self.backend == "pytorch":
+ if self.backend == "pytorch" and self.device == "cuda":
LOGGER.info("\t+ Tracking Pytorch CUDA latency")
else:
LOGGER.info("\t+ Tracking CPU latency")
+ self.reset()
+
def reset(self):
- self.latencies = []
+ self.start_events: List[Union[float, torch.cuda.Event]] = []
+ self.end_events: List[Union[float, torch.cuda.Event]] = []
+ self.start_time: float = time.perf_counter()
@contextmanager
def track(self):
- if is_torch_distributed_available() and torch.distributed.is_initialized():
- yield from self._pytorch_distributed_latency()
- elif self.backend == "pytorch" and self.device == "cuda":
+ if self.distributed:
+ torch.distributed.barrier(device_ids=[torch.cuda.current_device()] if self.device == "cuda" else None)
+
+ if self.backend == "pytorch" and self.device == "cuda":
yield from self._pytorch_cuda_latency()
else:
yield from self._cpu_latency()
- def _pytorch_distributed_latency(self):
- torch.distributed.barrier() # synchronize before workload
- start = time.perf_counter_ns()
+ if self.distributed:
+ torch.distributed.barrier(device_ids=[torch.cuda.current_device()] if self.device == "cuda" else None)
+
+ def _pytorch_cuda_latency(self):
+ start = torch.cuda.Event(enable_timing=True)
+ start.record()
+ self.start_events.append(start)
+
yield
- torch.distributed.barrier() # synchronize after workload
- end = time.perf_counter_ns()
- latency = (end - start) / 1e9
- self.latencies.append(latency)
+ end = torch.cuda.Event(enable_timing=True)
+ end.record()
+ self.end_events.append(end)
- LOGGER.debug(f"\t+ Tracked Pytorch distributed latency: {latency:.2e}s")
+ def _cpu_latency(self):
+ start = time.perf_counter()
+ self.start_events.append(start)
- def _pytorch_cuda_latency(self):
- # Note: torch.cuda.Event is not used here,
- # there's actually no specific need to use cuda events if you're synchronizing
- # it's rather a feature that can be used to measure kernel latency without synchronizing,
- # allowing us to measure the time it takes to perform an operation without necessarily stalling the GPU.
- # An interesting use case is with cuda graphs where synchronization makes us shoot the optimization in the foot.
- # details: https://developer.nvidia.com/blog/how-implement-performance-metrics-cuda-cc/
- torch.cuda.synchronize() # synchronize before workload
- start = time.perf_counter_ns()
yield
- torch.cuda.synchronize() # synchronize after workload
- end = time.perf_counter_ns()
- latency = (end - start) / 1e9
- self.latencies.append(latency)
+ end = time.perf_counter()
+ self.end_events.append(end)
- LOGGER.debug(f"\t+ Tracked Pytorch CUDA latency: {latency:.2e}s")
+ def get_elapsed_time(self) -> float:
+ # we measured in cpu to not synchronize all events
+ return time.perf_counter() - self.start_time
- def _cpu_latency(self):
- start = time.perf_counter_ns()
- yield
- end = time.perf_counter_ns()
+ def get_latency(self) -> Latency:
+ if self.backend == "pytorch" and self.device == "cuda":
+ # synchronize the last event to make sure it has been recorded
+ self.start_events[-1].synchronize()
+ self.end_events[-1].synchronize()
+
+ latencies_list = [
+ self.start_events[i].elapsed_time(self.end_events[i]) / 1e3 for i in range(len(self.start_events))
+ ]
+ else:
+ latencies_list = [(self.end_events[i] - self.start_events[i]) for i in range(len(self.start_events))]
+
+ return Latency.from_values(latencies_list, unit=LATENCY_UNIT)
+
+ def get_throughput(self, volume: int, unit: str) -> Throughput:
+ return Throughput.from_latency(self.get_latency(), volume, unit)
+
+
+class LatencyTrainerCallback(TrainerCallback):
+ def __init__(self, device: str, backend: str) -> None:
+ self.device = device
+ self.backend = backend
+
+ self.reset()
+
+ def reset(self):
+ self.events: List[Union[float, torch.cuda.Event]] = []
- latency = (end - start) / 1e9
- self.latencies.append(latency)
+ def on_step_begin(self, *args, **kwargs):
+ if self.device == "cuda" and self.backend == "pytorch":
+ event = torch.cuda.Event(enable_timing=True)
+ event.record()
+ self.events.append(event)
+ else:
+ self.events.append(time.perf_counter())
+
+ def on_train_end(self, *args, **kwargs):
+ # one last record to measure the time of the last step
+ if self.device == "cuda" and self.backend == "pytorch":
+ event = torch.cuda.Event(enable_timing=True)
+ event.record()
+ self.events.append(event)
+ else:
+ self.events.append(time.perf_counter())
- LOGGER.debug(f"\t+ Tracked CPU latency: {latency:.2e}s")
+ def get_latency(self) -> Latency:
+ if self.device == "cuda" and self.backend == "pytorch":
+ # synchronize the device to make sure all events have been recorded
+ torch.cuda.synchronize()
+ latencies_list = [self.events[i - 1].elapsed_time(self.events[i]) / 1e3 for i in range(1, len(self.events))]
+ else:
+ latencies_list = [(self.events[i] - self.events[i - 1]) for i in range(1, len(self.events))]
- def get_total_count(self):
- return len(self.latencies)
+ return Latency.from_values(latencies_list, unit=LATENCY_UNIT)
+
+ def get_throughput(self, volume: int, unit: str) -> Throughput:
+ return Throughput.from_latency(self.get_latency(), volume, unit)
+
+
+class LatencyLogitsProcessor(LogitsProcessor):
+ def __init__(self, device: str, backend: str):
+ self.device = device
+ self.backend = backend
+
+ self.reset()
+
+ def reset(self):
+ if self.device == "cuda" and self.backend == "pytorch":
+ event = torch.cuda.Event(enable_timing=True)
+ event.record()
+ self.events = [event]
+ else:
+ self.events = [time.perf_counter()]
+
+ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
+ if self.device == "cuda" and self.backend == "pytorch":
+ event = torch.cuda.Event(enable_timing=True)
+ event.record()
+ self.events.append(event)
+ else:
+ self.events.append(time.perf_counter())
+
+ return scores
+
+ def get_latency(self) -> Latency:
+ if self.device == "cuda" and self.backend == "pytorch":
+ # synchronize the device to make sure all events have been recorded
+ torch.cuda.synchronize()
+ latencies_list = [self.events[i - 1].elapsed_time(self.events[i]) / 1e3 for i in range(1, len(self.events))]
+ else:
+ latencies_list = [(self.events[i] - self.events[i - 1]) for i in range(1, len(self.events))]
- def get_total_latency(self):
- return sum(self.latencies)
+ return Latency.from_values(latencies_list, unit=LATENCY_UNIT)
- def get_latencies_list(self) -> List[float]:
- return self.latencies
+ def get_throughput(self, volume: int, unit: str) -> Throughput:
+ return Throughput.from_latency(self.get_latency(), volume, unit)
diff --git a/optimum_benchmark/trackers/memory.py b/optimum_benchmark/trackers/memory.py
index 816f1d5a..017c21fe 100644
--- a/optimum_benchmark/trackers/memory.py
+++ b/optimum_benchmark/trackers/memory.py
@@ -1,86 +1,110 @@
import os
-from logging import getLogger
from contextlib import contextmanager
-from typing import List, Optional, Dict
+from dataclasses import dataclass
+from logging import getLogger
from multiprocessing import Pipe, Process
from multiprocessing.connection import Connection
+from typing import List, Literal, Optional
-from ..env_utils import bytes_to_mega_bytes, get_cuda_device_ids, is_nvidia_system, is_rocm_system
-from ..import_utils import is_py3nvml_available, is_pyrsmi_available, is_torch_available
+from ..import_utils import is_amdsmi_available, is_pynvml_available, is_torch_available, is_torch_distributed_available
+from ..system_utils import get_gpu_device_ids, get_rocm_version, is_nvidia_system, is_rocm_system
-if is_nvidia_system():
- if is_py3nvml_available():
- import py3nvml.py3nvml as nvml
- else:
- raise ValueError(
- "The library py3nvml is required to run memory benchmark on NVIDIA GPUs, but is not installed. "
- "Please install it through `pip install py3nvml`."
- )
+if is_torch_distributed_available():
+ import torch.distributed
-if is_rocm_system():
- if is_pyrsmi_available():
- from pyrsmi import rocml
- else:
- raise ValueError(
- "The library pyrsmi is required to run memory benchmark on AMD GPUs, but is not installed. "
- "Please install it through `pip install pyrsmi@git+https://github.com/RadeonOpenCompute/pyrsmi.git."
- )
+if is_nvidia_system() and is_pynvml_available():
+ import pynvml
+
+if is_rocm_system() and is_amdsmi_available():
+ import amdsmi # type: ignore
if is_torch_available():
import torch
import psutil
-
LOGGER = getLogger("memory")
+MEMORY_UNIT = "MB"
+Memory_Unit_Literal = Literal["MB"]
-class MemoryTracker:
- """
- Memory tracker to measure max memory usage of CPU or GPU devices.
- Args:
- device (str): Device to track memory usage. Can be either "cuda" or any other device.
- backend (str): Backend to track memory usage. Can be either "pytorch" or any other backend.
- device_ids (List[int], optional): List of device IDs to track memory usage. Defaults to None.
- """
+@dataclass
+class Memory:
+ unit: Memory_Unit_Literal
+
+ max_ram: float
+ max_vram: Optional[float] = None
+ max_reserved: Optional[float] = None
+ max_allocated: Optional[float] = None
+
+ @staticmethod
+ def aggregate(memories: List["Memory"]) -> "Memory":
+ if len(memories) == 0:
+ raise ValueError("No memory measurements to aggregate")
+ elif any(memory is None for memory in memories):
+ raise ValueError("Some memory measurements are missing")
+ unit = memories[0].unit
+ max_ram = sum(memory.max_ram for memory in memories)
+ max_vram = sum(memory.max_vram for memory in memories) if memories[0].max_vram is not None else None
+ max_reserved = sum(memory.max_reserved for memory in memories) if memories[0].max_reserved is not None else None
+ max_allocated = (
+ sum(memory.max_allocated for memory in memories) if memories[0].max_allocated is not None else None
+ )
+ return Memory(
+ unit=unit, max_ram=max_ram, max_vram=max_vram, max_reserved=max_reserved, max_allocated=max_allocated
+ )
+
+ def log(self, prefix: str = "forward"):
+ LOGGER.info(f"\t\t+ {prefix} max RAM memory: {self.max_ram:f} ({self.unit})")
+ if self.max_vram is not None:
+ LOGGER.info(f"\t\t+ {prefix} max VRAM memory: {self.max_vram:f} ({self.unit})")
+ if self.max_reserved is not None:
+ LOGGER.info(f"\t\t+ {prefix} max reserved memory: {self.max_reserved:f} ({self.unit})")
+ if self.max_allocated is not None:
+ LOGGER.info(f"\t\t+ {prefix} max allocated memory: {self.max_allocated:f} ({self.unit})")
+
+
+class MemoryTracker:
def __init__(self, device: str, backend: str, device_ids: Optional[str] = None):
self.device = device
self.backend = backend
+ self.device_ids = device_ids
+ self.distributed = is_torch_distributed_available() and torch.distributed.is_initialized()
- self.max_memory_used = 0
- self.max_memory_reserved = 0
- self.max_memory_allocated = 0
+ LOGGER.info("\t+ Tracking RAM memory")
if self.device == "cuda":
- if device_ids is None:
+ if self.device_ids is None:
LOGGER.warning("\t+ `device=cuda` but `device_ids` not provided. Using all available CUDA devices.")
- self.device_ids = list(map(int, get_cuda_device_ids().split(",")))
- else:
- self.device_ids = list(map(int, device_ids.split(",")))
+ self.device_ids = get_gpu_device_ids()
+ self.device_ids = list(map(int, self.device_ids.split(",")))
LOGGER.info(f"\t+ Tracking VRAM memory of CUDA devices: {self.device_ids}")
if self.backend == "pytorch":
- self.pytorch_device_ids = list(range(torch.cuda.device_count()))
- LOGGER.info(f"\t+ Tracking Pytorch memory of Pytorch CUDA devices: {self.pytorch_device_ids}")
-
- if len(self.device_ids) != len(self.pytorch_device_ids):
+ num_pytorch_devices = torch.cuda.device_count()
+ if len(self.device_ids) != num_pytorch_devices:
raise ValueError(
"The number of CUDA devices and Pytorch CUDA devices must be the same. "
- f"Got {len(self.device_ids)} and {len(self.pytorch_device_ids)} respectively."
+ f"Got {len(self.device_ids)} and {num_pytorch_devices} respectively."
)
- else:
- LOGGER.info("\t+ Tracking RAM memory")
+ LOGGER.info(f"\t+ Tracking Allocated/Reserved memory of {num_pytorch_devices} Pytorch CUDA devices")
+
+ self.reset()
def reset(self):
- self.max_memory_used = 0
- self.max_memory_reserved = 0
- self.max_memory_allocated = 0
+ self.max_ram_memory = 0
+ self.max_vram_memory = 0
+ self.max_reserved_memory = 0
+ self.max_allocated_memory = 0
@contextmanager
def track(self):
+ if self.distributed:
+ torch.distributed.barrier(device_ids=[torch.cuda.current_device()] if self.device == "cuda" else None)
+
if self.device == "cuda" and self.backend == "pytorch":
yield from self._cuda_pytorch_memory()
elif self.device == "cuda":
@@ -88,122 +112,202 @@ def track(self):
else:
yield from self._cpu_memory()
+ if self.distributed:
+ torch.distributed.barrier(device_ids=[torch.cuda.current_device()] if self.device == "cuda" else None)
+
def _cuda_pytorch_memory(self):
torch.cuda.empty_cache()
- for pytorch_device_index in self.pytorch_device_ids:
+
+ for device in range(torch.cuda.device_count()):
try:
- torch.cuda.reset_peak_memory_stats(device=pytorch_device_index)
+ torch.cuda.reset_peak_memory_stats(device=device)
except Exception as e:
- LOGGER.warning(f"\t+ Could not reset max memory stats for device {pytorch_device_index}: {e}")
+ LOGGER.warning(f"\t\t+ Could not reset max memory stats for device {device}: {e}")
yield from self._cuda_memory()
- for pytorch_device_index in self.pytorch_device_ids:
- self.max_memory_reserved += torch.cuda.max_memory_reserved(device=pytorch_device_index)
- self.max_memory_allocated += torch.cuda.max_memory_allocated(device=pytorch_device_index)
+ self.max_allocated_memory = sum(
+ torch.cuda.max_memory_allocated(device=device) / 1e6 for device in range(torch.cuda.device_count())
+ )
+ self.max_reserved_memory = sum(
+ torch.cuda.max_memory_reserved(device=device) / 1e6 for device in range(torch.cuda.device_count())
+ )
- LOGGER.debug(f"\t+ Pytorch max memory reserved: {self.get_max_memory_reserved_mb()} MB")
- LOGGER.debug(f"\t+ Pytorch max memory allocated: {self.get_max_memory_allocated_mb()} MB")
+ torch.cuda.empty_cache()
- def _cuda_memory(self, interval: float = 0.001):
+ def _cuda_memory(self):
child_connection, parent_connection = Pipe()
memory_process = Process(
- target=monitor_gpu_max_vram_memory,
- args=(self.device_ids, child_connection, interval),
- daemon=True,
+ target=monitor_gpu_vram_memory, args=(os.getpid(), self.device_ids, child_connection), daemon=True
)
memory_process.start()
parent_connection.recv() # wait for memory process to be ready
- yield
+ yield from self._cpu_memory()
parent_connection.send(True)
- self.max_memory_used = parent_connection.recv()
- LOGGER.debug(f"\t+ Max memory (VRAM) used: {self.get_max_memory_used_mb()} MB")
+ self.max_vram_memory = parent_connection.recv()
- def _cpu_memory(self, interval: float = 0.001):
+ def _cpu_memory(self):
child_connection, parent_connection = Pipe()
- memory_process = Process(
- target=monitor_cpu_max_ram_memory,
- args=(os.getpid(), child_connection, interval),
- daemon=True,
- )
+ memory_process = Process(target=monitor_cpu_ram_memory, args=(os.getpid(), child_connection), daemon=True)
memory_process.start()
parent_connection.recv() # wait for memory process to be ready
yield
parent_connection.send(True)
- self.max_memory_used = parent_connection.recv()
- LOGGER.debug(f"\t+ Max memory (RAM) used: {self.get_max_memory_used_mb()} MB")
-
- def get_max_memory_used_mb(self) -> int:
- return bytes_to_mega_bytes(self.max_memory_used)
-
- def get_max_memory_allocated_mb(self) -> int:
- return bytes_to_mega_bytes(self.max_memory_allocated)
-
- def get_max_memory_reserved_mb(self) -> int:
- return bytes_to_mega_bytes(self.max_memory_reserved)
+ self.max_ram_memory = parent_connection.recv()
- def get_memories_dict(self) -> Dict[str, int]:
+ def get_max_memory(self):
if self.device == "cuda" and self.backend == "pytorch":
- return {
- "max_vram_used(MB)": self.get_max_memory_used_mb(),
- "max_memory_reserved(MB)": self.get_max_memory_reserved_mb(),
- "max_memory_allocated(MB)": self.get_max_memory_allocated_mb(),
- }
+ return Memory(
+ unit=MEMORY_UNIT,
+ max_ram=self.max_ram_memory,
+ max_vram=self.max_vram_memory,
+ max_reserved=self.max_reserved_memory,
+ max_allocated=self.max_allocated_memory,
+ )
elif self.device == "cuda":
- return {"max_vram_used(MB)": self.get_max_memory_used_mb()}
+ return Memory(unit=MEMORY_UNIT, max_ram=self.max_ram_memory, max_vram=self.max_vram_memory)
else:
- return {"max_ram_used(MB)": self.get_max_memory_used_mb()}
+ return Memory(unit=MEMORY_UNIT, max_ram=self.max_ram_memory)
-def monitor_cpu_max_ram_memory(process_id: int, connection: Connection, interval: float):
+def monitor_cpu_ram_memory(process_id: int, connection: Connection, interval: float = 0.001):
+ stop = False
+ max_memory = 0
process = psutil.Process(process_id)
- max_memory_usage = 0
connection.send(0)
- stop = False
while not stop:
meminfo_attr = "memory_info" if hasattr(process, "memory_info") else "get_memory_info"
- current_memory_usage = getattr(process, meminfo_attr)()[0]
- max_memory_usage = max(max_memory_usage, current_memory_usage)
+ current_used_memory = getattr(process, meminfo_attr)()[0]
+ max_memory = max(max_memory, current_used_memory)
stop = connection.poll(interval)
- connection.send(max_memory_usage)
+ connection.send(max_memory / 1e6) # convert to MB
connection.close()
-def monitor_gpu_max_vram_memory(device_ids: List[int], connection: Connection, interval: float):
- if is_nvidia_system() and is_py3nvml_available():
- nvml.nvmlInit()
- handles = [nvml.nvmlDeviceGetHandleByIndex(device_id) for device_id in device_ids]
- max_memory_usage = 0
- connection.send(0)
- stop = False
-
- while not stop:
- current_memory_usage = sum(nvml.nvmlDeviceGetMemoryInfo(handle).used for handle in handles)
- max_memory_usage = max(max_memory_usage, current_memory_usage)
- stop = connection.poll(interval)
+def monitor_gpu_vram_memory(process_id: int, device_ids: List[int], connection: Connection, interval: float = 0.01):
+ stop = False
+ max_memory = 0
+ connection.send(0)
- connection.send(max_memory_usage)
- nvml.nvmlShutdown()
- connection.close()
- elif is_rocm_system() and is_pyrsmi_available():
- rocml.smi_initialize()
- max_memory_usage = 0
- connection.send(0)
- stop = False
+ if is_nvidia_system():
+ if not is_pynvml_available():
+ raise ValueError(
+ "The library pynvml is required to run memory benchmark on NVIDIA GPUs, but is not installed. "
+ "Please install the official and NVIDIA maintained PyNVML library through `pip install nvidia-ml-py`."
+ )
+ pynvml.nvmlInit()
+ devices_handles = [pynvml.nvmlDeviceGetHandleByIndex(device_id) for device_id in device_ids]
while not stop:
- current_memory_usage = sum(rocml.smi_get_device_memory_used(device_id) for device_id in device_ids)
- max_memory_usage = max(max_memory_usage, current_memory_usage)
+ current_used_memory = 0
+ for device_id, device_handle in zip(device_ids, devices_handles):
+ try:
+ device_processes = pynvml.nvmlDeviceGetComputeRunningProcesses(device_handle)
+ except Exception as e:
+ LOGGER.warning(f"\t\t+ Could not get process list for device {device_id}: {e}")
+ continue
+ for device_process in device_processes:
+ if device_process.pid == process_id:
+ current_used_memory += device_process.usedGpuMemory
+ else:
+ try:
+ cpu_process = psutil.Process(device_process.pid)
+ except Exception as e:
+ LOGGER.warning(f"\t\t+ Could not get process info for process {device_process.pid}: {e}")
+ continue
+ if cpu_process.parent() is not None and cpu_process.parent().pid == process_id:
+ current_used_memory += device_process.usedGpuMemory
+
+ max_memory = max(max_memory, current_used_memory)
stop = connection.poll(interval)
- connection.send(max_memory_usage)
- rocml.smi_shutdown()
- connection.close()
+ pynvml.nvmlShutdown()
+
+ elif is_rocm_system():
+ if not is_amdsmi_available():
+ raise ValueError(
+ "The library amdsmi is required to run memory benchmark on AMD GPUs, but is not installed. "
+ "Please install the official and AMD maintained amdsmi library from https://github.com/ROCm/amdsmi."
+ )
+ amdsmi.amdsmi_init()
+ rocm_version = get_rocm_version()
+
+ if rocm_version >= "5.7":
+ devices_handles = amdsmi.amdsmi_get_processor_handles()
+ while not stop:
+ current_used_memory = 0
+ for device_id in device_ids:
+ device_handle = devices_handles[device_id]
+ try:
+ processes_handles = amdsmi.amdsmi_get_gpu_process_list(device_handle)
+ except Exception as e:
+ LOGGER.warning(f"\t\t+ Could not get process list for device {device_id}: {e}")
+ continue
+ for process_handle in processes_handles:
+ try:
+ gpu_process_info = amdsmi.amdsmi_get_gpu_process_info(device_handle, process_handle)
+ except Exception as e:
+ LOGGER.warning(f"\t\t+ Could not get process info for process {process_handle}: {e}")
+ continue
+ # only memory usage of the monitored process and its children is tracked
+ if gpu_process_info["pid"] == process_id:
+ current_used_memory += gpu_process_info["memory_usage"]["vram_mem"]
+ else:
+ try:
+ cpu_process_info = psutil.Process(gpu_process_info["pid"])
+ except Exception as e:
+ LOGGER.warning(
+ f"\t\t+ Could not get process info for process {gpu_process_info['pid']}: {e}"
+ )
+ continue
+ if cpu_process_info.parent() is not None and cpu_process_info.ppid() == process_id:
+ current_used_memory += gpu_process_info["memory_usage"]["vram_mem"]
+
+ max_memory = max(max_memory, current_used_memory)
+ stop = connection.poll(interval)
+ else:
+ devices_handles = amdsmi.amdsmi_get_device_handles()
+ while not stop:
+ current_used_memory = 0
+ for device_id in device_ids:
+ device_handle = devices_handles[device_id]
+ try:
+ processes_handles = amdsmi.amdsmi_get_process_list(device_handle)
+ except Exception as e:
+ LOGGER.warning(f"\t\t+ Could not get process list for device {device_id}: {e}")
+ continue
+ for process_handle in processes_handles:
+ try:
+ gpu_process_info = amdsmi.amdsmi_get_process_info(device_handle, process_handle)
+ except Exception as e:
+ LOGGER.warning(f"\t\t+ Could not get process info for process {process_handle}: {e}")
+ continue
+ # only memory usage of the monitored process and its children is tracked
+ if gpu_process_info["pid"] == process_id:
+ current_used_memory += gpu_process_info["memory_usage"]["vram_mem"]
+ else:
+ try:
+ cpu_process_info = psutil.Process(gpu_process_info["pid"])
+ except Exception as e:
+ LOGGER.warning(
+ f"\t\t+ Could not get process info for process {gpu_process_info['pid']}: {e}"
+ )
+ continue
+ if cpu_process_info.parent() is not None and cpu_process_info.ppid() == process_id:
+ current_used_memory += gpu_process_info["memory_usage"]["vram_mem"]
+
+ max_memory = max(max_memory, current_used_memory)
+ stop = connection.poll(interval)
+
+ amdsmi.amdsmi_shut_down()
else:
raise ValueError("Only NVIDIA and AMD ROCm GPUs are supported for CUDA memory tracking.")
+
+ connection.send(max_memory / 1e6) # convert to MB
+ connection.close()
diff --git a/pyproject.toml b/pyproject.toml
index e9ce4301..58e5b284 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,5 +1,18 @@
+# [tool.isort]
+# profile = "ruff"
+# lines_after_imports = 2
+# known_first_party = "optimum_benchmark"
+
[tool.ruff]
line-length = 120
+ignore = ["C901", "E501", "E741", "W605"]
+select = ["C", "E", "F", "I", "W", "I001"]
+
+[tool.ruff.format]
+line-ending = "auto"
+quote-style = "double"
+indent-style = "space"
+skip-magic-trailing-comma = false
[tool.pytest.ini_options]
log_cli = true
diff --git a/setup.py b/setup.py
index 40504fd3..f993adc4 100644
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,6 @@
import os
import subprocess
+
from setuptools import find_packages, setup
MIN_OPTIMUM_VERSION = "1.16.0"
@@ -12,13 +13,10 @@
"hydra_colorlog",
"hydra-core",
"omegaconf",
- # Other
+ # CPU Memory
"psutil",
- "pandas",
# Reporting
- "rich",
- "tabulate",
- "matplotlib",
+ "pandas",
"flatten_dict",
]
@@ -28,20 +26,21 @@
USE_ROCM = os.environ.get("USE_ROCM", None) == "1"
if USE_CUDA:
- INSTALL_REQUIRES.append("py3nvml")
+ INSTALL_REQUIRES.append("nvidia-ml-py")
else:
try:
subprocess.run(["nvidia-smi"], stdout=subprocess.DEVNULL)
- INSTALL_REQUIRES.append("py3nvml")
+ INSTALL_REQUIRES.append("nvidia-ml-py")
except FileNotFoundError:
pass
+# we keep this as a check that amdsmi is installed since it's not available on pypi
if USE_ROCM:
- INSTALL_REQUIRES.append("pyrsmi@git+https://github.com/RadeonOpenCompute/pyrsmi.git")
+ INSTALL_REQUIRES.append("amdsmi")
else:
try:
- subprocess.run(["nvidia-smi"], stdout=subprocess.DEVNULL)
- INSTALL_REQUIRES.append("pyrsmi@git+https://github.com/RadeonOpenCompute/pyrsmi.git")
+ subprocess.run(["rocm-smi"], stdout=subprocess.DEVNULL)
+ INSTALL_REQUIRES.append("amdsmi")
except FileNotFoundError:
pass
@@ -54,11 +53,7 @@
"onnxruntime": [f"optimum[onnxruntime]>={MIN_OPTIMUM_VERSION}"],
"neural-compressor": [f"optimum[neural-compressor]>={MIN_OPTIMUM_VERSION}"],
"onnxruntime-gpu": [f"optimum[onnxruntime-gpu]>={MIN_OPTIMUM_VERSION}"],
- "torch-ort": [
- f"optimum>={MIN_OPTIMUM_VERSION}",
- "onnxruntime-training",
- "torch-ort",
- ],
+ "torch-ort": [f"optimum>={MIN_OPTIMUM_VERSION}", "onnxruntime-training", "torch-ort"],
# docker-based backends
"text-generation-inference": ["docker"],
# specific settings
@@ -75,6 +70,6 @@
install_requires=INSTALL_REQUIRES,
extras_require=EXTRAS_REQUIRE,
packages=find_packages(),
- version="0.0.2",
+ version="0.1.0",
entry_points={"console_scripts": ["optimum-benchmark=optimum_benchmark.cli:benchmark_cli"]},
)
diff --git a/tests/configs/_base_.yaml b/tests/configs/_base_.yaml
index d983b841..27acb325 100644
--- a/tests/configs/_base_.yaml
+++ b/tests/configs/_base_.yaml
@@ -2,8 +2,8 @@ defaults:
- launcher: process # isolated process launcher
- experiment # inheriting experiment schema
- _self_ # for hydra 1.1 compatibility
- - override hydra/hydra_logging: colorlog # colorful logging
- - override hydra/job_logging: colorlog # colorful logging
+ - override hydra/hydra_logging: colorlog
+ - override hydra/job_logging: colorlog
- override hydra/launcher: joblib # for parallelization
experiment_name: ${device}_${benchmark.name}_${backend.name}_${task}
@@ -20,13 +20,12 @@ hydra:
# change working directory to the run directory
chdir: true
env_set:
- # set environment variable OVERRIDE_BENCHMARKS to 1
- # to not skip benchmarks that have been run before
+ # to not skip benchmarks if results already exist
OVERRIDE_BENCHMARKS: 1
# we are using joblib launcher to parallelize testing since
- # we're having ccorrect benchmarks is not important while testing
+ # having correct benchmark values is not important while testing
# to force sequential execution, uncomment the following three lines
# launcher:
- # n_jobs: 1 # for debugging
- # batch_size: 1 # for debugging
+ # n_jobs: -1 # 1 for debugging
+ # batch_size: auto # 1 for debugging
diff --git a/tests/configs/_bert_sweep_.yaml b/tests/configs/_bert_sweep_.yaml
index e6a6c4fc..c4986d0d 100644
--- a/tests/configs/_bert_sweep_.yaml
+++ b/tests/configs/_bert_sweep_.yaml
@@ -1,6 +1,5 @@
hydra:
sweeper:
params:
- backend.no_weights: false,true
+ backend.model: hf-internal-testing/tiny-random-bert
backend.task: fill-mask,text-classification,token-classification,question-answering
- backend.model: hf-internal-testing/tiny-random-bert,hf-internal-testing/tiny-random-roberta
diff --git a/tests/configs/_lm_naive_mp_.yaml b/tests/configs/_gpt_naive_mp_.yaml
similarity index 100%
rename from tests/configs/_lm_naive_mp_.yaml
rename to tests/configs/_gpt_naive_mp_.yaml
index 2ac16fb8..cf2adfd3 100644
--- a/tests/configs/_lm_naive_mp_.yaml
+++ b/tests/configs/_gpt_naive_mp_.yaml
@@ -1,6 +1,6 @@
backend:
- device_ids: 0,1
- device_map: auto
+ model: gpt2
task: text-generation
library: transformers
- model: gpt2
+ device_ids: 0,1
+ device_map: auto
diff --git a/tests/configs/_lm_peft_.yaml b/tests/configs/_gpt_peft_.yaml
similarity index 100%
rename from tests/configs/_lm_peft_.yaml
rename to tests/configs/_gpt_peft_.yaml
diff --git a/tests/configs/_lm_sweep_.yaml b/tests/configs/_gpt_sweep_.yaml
similarity index 81%
rename from tests/configs/_lm_sweep_.yaml
rename to tests/configs/_gpt_sweep_.yaml
index 763d7120..1ff5e2c7 100644
--- a/tests/configs/_lm_sweep_.yaml
+++ b/tests/configs/_gpt_sweep_.yaml
@@ -2,5 +2,4 @@ hydra:
sweeper:
params:
backend.task: text-generation
- backend.no_weights: false,true
backend.model: hf-internal-testing/tiny-random-gpt2,IlyasMoutawwakil/tiny-random-llama
diff --git a/tests/configs/cpu_inference_neural_compressor_lm_sweep.yaml b/tests/configs/cpu_inference_neural_compressor_gpt_sweep.yaml
similarity index 70%
rename from tests/configs/cpu_inference_neural_compressor_lm_sweep.yaml
rename to tests/configs/cpu_inference_neural_compressor_gpt_sweep.yaml
index 21fb30d9..bf2f9d15 100644
--- a/tests/configs/cpu_inference_neural_compressor_lm_sweep.yaml
+++ b/tests/configs/cpu_inference_neural_compressor_gpt_sweep.yaml
@@ -3,8 +3,8 @@ defaults:
# order of inheritance, last one overrides previous ones
- _base_ # inherits from base config
- _inference_ # inherits from inference config
- - _lm_sweep_ # inherits from gpt_sweep config
+ - _gpt_sweep_ # inherits from gpt_sweep config
- _cpu_ # inherits from cpu config
- _self_ # hydra 1.1 compatibility
-experiment_name: cpu_inference_neural_compressor_lm_sweep
+experiment_name: cpu_inference_neural_compressor_gpt_sweep
diff --git a/tests/configs/cpu_inference_onnxruntime_lm_sweep.yaml b/tests/configs/cpu_inference_onnxruntime_gpt_sweep.yaml
similarity index 71%
rename from tests/configs/cpu_inference_onnxruntime_lm_sweep.yaml
rename to tests/configs/cpu_inference_onnxruntime_gpt_sweep.yaml
index 6e3c214c..a958bb55 100644
--- a/tests/configs/cpu_inference_onnxruntime_lm_sweep.yaml
+++ b/tests/configs/cpu_inference_onnxruntime_gpt_sweep.yaml
@@ -3,8 +3,8 @@ defaults:
# order of inheritance, last one overrides previous ones
- _base_ # inherits from base config
- _inference_ # inherits from inference config
- - _lm_sweep_ # inherits from gpt_sweep config
+ - _gpt_sweep_ # inherits from gpt_sweep config
- _cpu_ # inherits from cpu config
- _self_ # hydra 1.1 compatibility
-experiment_name: cpu_inference_onnxruntime_lm_sweep
+experiment_name: cpu_inference_onnxruntime_gpt_sweep
diff --git a/tests/configs/cpu_inference_openvino_lm_sweep.yaml b/tests/configs/cpu_inference_openvino_gpt_sweep.yaml
similarity index 71%
rename from tests/configs/cpu_inference_openvino_lm_sweep.yaml
rename to tests/configs/cpu_inference_openvino_gpt_sweep.yaml
index 8389d7b3..486f9e8f 100644
--- a/tests/configs/cpu_inference_openvino_lm_sweep.yaml
+++ b/tests/configs/cpu_inference_openvino_gpt_sweep.yaml
@@ -3,8 +3,8 @@ defaults:
# order of inheritance, last one overrides previous ones
- _base_ # inherits from base config
- _inference_ # inherits from inference config
- - _lm_sweep_ # inherits from gpt_sweep config
+ - _gpt_sweep_ # inherits from gpt_sweep config
- _cpu_ # inherits from cpu config
- _self_ # hydra 1.1 compatibility
-experiment_name: cpu_inference_openvino_lm_sweep
+experiment_name: cpu_inference_openvino_gpt_sweep
diff --git a/tests/configs/cpu_inference_pytorch_lm_sweep.yaml b/tests/configs/cpu_inference_pytorch_gpt_sweep.yaml
similarity index 72%
rename from tests/configs/cpu_inference_pytorch_lm_sweep.yaml
rename to tests/configs/cpu_inference_pytorch_gpt_sweep.yaml
index c30d7b60..b4720e88 100644
--- a/tests/configs/cpu_inference_pytorch_lm_sweep.yaml
+++ b/tests/configs/cpu_inference_pytorch_gpt_sweep.yaml
@@ -3,8 +3,8 @@ defaults:
# order of inheritance, last one overrides previous ones
- _base_ # inherits from base config
- _inference_ # inherits from inference config
- - _lm_sweep_ # inherits from gpt_sweep config
+ - _gpt_sweep_ # inherits from gpt_sweep config
- _cpu_ # inherits from cpu config
- _self_ # hydra 1.1 compatibility
-experiment_name: cpu_inference_pytorch_lm_sweep
+experiment_name: cpu_inference_pytorch_gpt_sweep
diff --git a/tests/configs/cpu_training_pytorch_lm_sweep.yaml b/tests/configs/cpu_training_pytorch_gpt_sweep.yaml
similarity index 72%
rename from tests/configs/cpu_training_pytorch_lm_sweep.yaml
rename to tests/configs/cpu_training_pytorch_gpt_sweep.yaml
index 8b3fbb83..5f8987b6 100644
--- a/tests/configs/cpu_training_pytorch_lm_sweep.yaml
+++ b/tests/configs/cpu_training_pytorch_gpt_sweep.yaml
@@ -3,8 +3,8 @@ defaults:
# order of inheritance, last one overrides previous ones
- _base_ # inherits from base config
- _training_ # inherits from training config
- - _lm_sweep_ # inherits from gpt_sweep config
+ - _gpt_sweep_ # inherits from gpt_sweep config
- _cpu_ # inherits from cpu config
- _self_ # hydra 1.1 compatibility
-experiment_name: cpu_training_pytorch_lm_sweep
+experiment_name: cpu_training_pytorch_gpt_sweep
diff --git a/tests/configs/cuda_inference_onnxruntime_lm_sweep.yaml b/tests/configs/cuda_inference_onnxruntime_gpt_sweep.yaml
similarity index 71%
rename from tests/configs/cuda_inference_onnxruntime_lm_sweep.yaml
rename to tests/configs/cuda_inference_onnxruntime_gpt_sweep.yaml
index e220b955..f9b38910 100644
--- a/tests/configs/cuda_inference_onnxruntime_lm_sweep.yaml
+++ b/tests/configs/cuda_inference_onnxruntime_gpt_sweep.yaml
@@ -3,8 +3,8 @@ defaults:
# order of inheritance, last one overrides previous ones
- _base_ # inherits from base config
- _inference_ # inherits from inference config
- - _lm_sweep_ # inherits from gpt_sweep config
+ - _gpt_sweep_ # inherits from gpt_sweep config
- _cuda_ # inherits from cpu config
- _self_ # hydra 1.1 compatibility
-experiment_name: cuda_inference_onnxruntime_lm_sweep
+experiment_name: cuda_inference_onnxruntime_gpt_sweep
diff --git a/tests/configs/cuda_inference_pytorch_lm_naive_mp.yaml b/tests/configs/cuda_inference_pytorch_gpt_naive_mp.yaml
similarity index 70%
rename from tests/configs/cuda_inference_pytorch_lm_naive_mp.yaml
rename to tests/configs/cuda_inference_pytorch_gpt_naive_mp.yaml
index a274429f..6e19ba18 100644
--- a/tests/configs/cuda_inference_pytorch_lm_naive_mp.yaml
+++ b/tests/configs/cuda_inference_pytorch_gpt_naive_mp.yaml
@@ -3,8 +3,8 @@ defaults:
# order of inheritance, last one overrides previous ones
- _base_ # inherits from base config
- _inference_ # inherits from inference config
- - _lm_naive_mp_ # inherits from lm naive mp config
+ - _gpt_naive_mp_ # inherits from lm naive mp config
- _cuda_ # inherits from cpu config
- _self_ # hydra 1.1 compatibility
-experiment_name: cuda_inference_pytorch_lm_naive_mp
+experiment_name: cuda_inference_pytorch_gpt_naive_mp
diff --git a/tests/configs/cuda_inference_pytorch_lm_sweep.yaml b/tests/configs/cuda_inference_pytorch_gpt_sweep.yaml
similarity index 72%
rename from tests/configs/cuda_inference_pytorch_lm_sweep.yaml
rename to tests/configs/cuda_inference_pytorch_gpt_sweep.yaml
index 23b7ace2..8b033a67 100644
--- a/tests/configs/cuda_inference_pytorch_lm_sweep.yaml
+++ b/tests/configs/cuda_inference_pytorch_gpt_sweep.yaml
@@ -3,8 +3,8 @@ defaults:
# order of inheritance, last one overrides previous ones
- _base_ # inherits from base config
- _inference_ # inherits from inference config
- - _lm_sweep_ # inherits from gpt_sweep config
+ - _gpt_sweep_ # inherits from gpt_sweep config
- _cuda_ # inherits from cpu config
- _self_ # hydra 1.1 compatibility
-experiment_name: cpu_inference_pytorch_lm_sweep
+experiment_name: cpu_inference_pytorch_gpt_sweep
diff --git a/tests/configs/cuda_training_pytorch_lm_naive_mp.yaml b/tests/configs/cuda_training_pytorch_gpt_naive_mp.yaml
similarity index 70%
rename from tests/configs/cuda_training_pytorch_lm_naive_mp.yaml
rename to tests/configs/cuda_training_pytorch_gpt_naive_mp.yaml
index 714f8692..ab6d4bc2 100644
--- a/tests/configs/cuda_training_pytorch_lm_naive_mp.yaml
+++ b/tests/configs/cuda_training_pytorch_gpt_naive_mp.yaml
@@ -3,8 +3,8 @@ defaults:
# order of inheritance, last one overrides previous ones
- _base_ # inherits from base config
- _training_ # inherits from training config
- - _lm_naive_mp_ # inherits from lm naive mp config
+ - _gpt_naive_mp_ # inherits from lm naive mp config
- _cuda_ # inherits from cpu config
- _self_ # hydra 1.1 compatibility
-experiment_name: cuda_training_pytorch_lm_naive_mp
+experiment_name: cuda_training_pytorch_gpt_naive_mp
diff --git a/tests/configs/cuda_training_pytorch_lm_peft.yaml b/tests/configs/cuda_training_pytorch_gpt_peft.yaml
similarity index 69%
rename from tests/configs/cuda_training_pytorch_lm_peft.yaml
rename to tests/configs/cuda_training_pytorch_gpt_peft.yaml
index be198ecc..1ee6f473 100644
--- a/tests/configs/cuda_training_pytorch_lm_peft.yaml
+++ b/tests/configs/cuda_training_pytorch_gpt_peft.yaml
@@ -3,8 +3,8 @@ defaults:
# order of inheritance, last one overrides previous ones
- _base_ # inherits from base config
- _training_ # inherits from training config
- - _lm_peft_ # inherits from language modeling peft config
+ - _gpt_peft_ # inherits from language modeling peft config
- _cuda_ # inherits from cpu config
- _self_ # hydra 1.1 compatibility
-experiment_name: cuda_training_pytorch_lm_peft
+experiment_name: cuda_training_pytorch_gpt_peft
diff --git a/tests/configs/cuda_training_pytorch_lm_sweep.yaml b/tests/configs/cuda_training_pytorch_gpt_sweep.yaml
similarity index 69%
rename from tests/configs/cuda_training_pytorch_lm_sweep.yaml
rename to tests/configs/cuda_training_pytorch_gpt_sweep.yaml
index 17fefe51..004f1f82 100644
--- a/tests/configs/cuda_training_pytorch_lm_sweep.yaml
+++ b/tests/configs/cuda_training_pytorch_gpt_sweep.yaml
@@ -3,8 +3,8 @@ defaults:
# order of inheritance, last one overrides previous ones
- _base_ # inherits from base config
- _training_ # inherits from training config
- - _lm_sweep_ # inherits from language modeling sweep config
+ - _gpt_sweep_ # inherits from language modeling sweep config
- _cuda_ # inherits from cpu config
- _self_ # hydra 1.1 compatibility
-experiment_name: cuda_training_pytorch_lm_sweep
+experiment_name: cuda_training_pytorch_gpt_sweep
diff --git a/tests/configs/cuda_training_torch_ort_lm_peft.yaml b/tests/configs/cuda_training_torch_ort_gpt_peft.yaml
similarity index 69%
rename from tests/configs/cuda_training_torch_ort_lm_peft.yaml
rename to tests/configs/cuda_training_torch_ort_gpt_peft.yaml
index 98e347a4..665dec16 100644
--- a/tests/configs/cuda_training_torch_ort_lm_peft.yaml
+++ b/tests/configs/cuda_training_torch_ort_gpt_peft.yaml
@@ -3,8 +3,8 @@ defaults:
# order of inheritance, last one overrides previous ones
- _base_ # inherits from base config
- _training_ # inherits from training config
- - _lm_peft_ # inherits from language modeling peft config
+ - _gpt_peft_ # inherits from language modeling peft config
- _cuda_ # inherits from cpu config
- _self_ # hydra 1.1 compatibility
-experiment_name: cuda_training_torch_ort_lm_peft
+experiment_name: cuda_training_torch_ort_gpt_peft
diff --git a/tests/configs/cuda_training_torch_ort_lm_sweep.yaml b/tests/configs/cuda_training_torch_ort_gpt_sweep.yaml
similarity index 69%
rename from tests/configs/cuda_training_torch_ort_lm_sweep.yaml
rename to tests/configs/cuda_training_torch_ort_gpt_sweep.yaml
index 25d4d054..ff8f505f 100644
--- a/tests/configs/cuda_training_torch_ort_lm_sweep.yaml
+++ b/tests/configs/cuda_training_torch_ort_gpt_sweep.yaml
@@ -3,8 +3,8 @@ defaults:
# order of inheritance, last one overrides previous ones
- _base_ # inherits from base config
- _training_ # inherits from inference config
- - _lm_sweep_ # inherits from language modeling sweep config
+ - _gpt_sweep_ # inherits from language modeling sweep config
- _cuda_ # inherits from cpu config
- _self_ # hydra 1.1 compatibility
-experiment_name: cuda_training_torch_ort_lm_sweep
+experiment_name: cuda_training_torch_ort_gpt_sweep
diff --git a/tests/configs/rocm_inference_onnxruntime_lm_sweep.yaml b/tests/configs/rocm_inference_onnxruntime_gpt_sweep.yaml
similarity index 71%
rename from tests/configs/rocm_inference_onnxruntime_lm_sweep.yaml
rename to tests/configs/rocm_inference_onnxruntime_gpt_sweep.yaml
index d6630ff1..f53b6612 100644
--- a/tests/configs/rocm_inference_onnxruntime_lm_sweep.yaml
+++ b/tests/configs/rocm_inference_onnxruntime_gpt_sweep.yaml
@@ -3,8 +3,8 @@ defaults:
# order of inheritance, last one overrides previous ones
- _base_ # inherits from base config
- _inference_ # inherits from inference config
- - _lm_sweep_ # inherits from gpt sweep config
+ - _gpt_sweep_ # inherits from gpt sweep config
- _rocm_ # inherits from rocm config
- _self_ # hydra 1.1 compatibility
-experiment_name: rocm_inference_onnxruntime_lm_sweep
+experiment_name: rocm_inference_onnxruntime_gpt_sweep
diff --git a/tests/test_api.py b/tests/test_api.py
index 0bf6ced9..30815d82 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -1,36 +1,28 @@
-from logging import getLogger
+import gc
import time
+from tempfile import TemporaryDirectory
-import torch
import pytest
+import torch
-from optimum_benchmark.trackers.memory import MemoryTracker
-from optimum_benchmark.trackers.latency import LatencyTracker
-from optimum_benchmark.experiment import ExperimentConfig, launch
-from optimum_benchmark.launchers.inline.config import InlineConfig
from optimum_benchmark.backends.pytorch.config import PyTorchConfig
-from optimum_benchmark.launchers.process.config import ProcessConfig
-from optimum_benchmark.launchers.torchrun.config import TorchrunConfig
-from optimum_benchmark.benchmarks.inference.config import INPUT_SHAPES
-from optimum_benchmark.benchmarks.training.config import DATASET_SHAPES
-from optimum_benchmark.generators.input_generator import InputGenerator
-from optimum_benchmark.benchmarks.training.config import TrainingConfig
-from optimum_benchmark.benchmarks.inference.config import InferenceConfig
-from optimum_benchmark.generators.dataset_generator import DatasetGenerator
-from optimum_benchmark.task_utils import TEXT_GENERATION_TASKS, IMAGE_DIFFUSION_TASKS
from optimum_benchmark.backends.timm_utils import extract_timm_shapes_from_config, get_timm_pretrained_config
from optimum_benchmark.backends.transformers_utils import (
extract_transformers_shapes_from_artifacts,
get_transformers_pretrained_config,
)
+from optimum_benchmark.benchmarks.inference.config import INPUT_SHAPES, InferenceConfig
+from optimum_benchmark.benchmarks.training.config import DATASET_SHAPES
+from optimum_benchmark.experiment import ExperimentConfig, launch
+from optimum_benchmark.generators.dataset_generator import DatasetGenerator
+from optimum_benchmark.generators.input_generator import InputGenerator
+from optimum_benchmark.launchers.inline.config import InlineConfig
+from optimum_benchmark.launchers.process.config import ProcessConfig
+from optimum_benchmark.launchers.torchrun.config import TorchrunConfig
+from optimum_benchmark.task_utils import IMAGE_DIFFUSION_TASKS, TEXT_GENERATION_TASKS
+from optimum_benchmark.trackers.latency import LatencyTracker
+from optimum_benchmark.trackers.memory import MemoryTracker
-
-LOGGER = getLogger("test-api")
-
-DEVICES_BACKENDS = [
- ("cpu", "none"),
- ("cuda", "pytorch"),
-]
LIBRARIES_TASKS_MODELS = [
("transformers", "fill-mask", "bert-base-uncased"),
("timm", "image-classification", "timm/resnet50.a1_in1k"),
@@ -43,18 +35,17 @@
("transformers", "image-classification", "google/vit-base-patch16-224"),
("transformers", "semantic-segmentation", "google/vit-base-patch16-224"),
]
-BENCHMARK_CONFIGS = [
- InferenceConfig(latency=True, memory=True),
- TrainingConfig(latency=True, memory=True),
-]
LAUNCHER_CONFIGS = [
- TorchrunConfig(nproc_per_node=2, device_isolation=False),
- ProcessConfig(device_isolation=False),
InlineConfig(device_isolation=False),
+ ProcessConfig(device_isolation=False),
+ TorchrunConfig(device_isolation=False, nproc_per_node=2),
]
+BACKENDS = ["pytorch", "none"]
+DEVICES = ["cpu", "cuda"]
-@pytest.mark.parametrize("device,backend", DEVICES_BACKENDS)
+@pytest.mark.parametrize("device", DEVICES)
+@pytest.mark.parametrize("backend", BACKENDS)
def test_api_latency_tracker(device, backend):
expected_latency = 1
tracker = LatencyTracker(device=device, backend=backend)
@@ -63,40 +54,55 @@ def test_api_latency_tracker(device, backend):
with tracker.track():
time.sleep(1)
- latencies_list = tracker.get_latencies_list()
+ latency = tracker.get_latency()
+ latency.log()
- assert len(latencies_list) == 2
- assert latencies_list[0] > expected_latency * 0.9
- assert latencies_list[0] < expected_latency * 1.1
+ assert latency.mean < expected_latency * 1.1
+ assert latency.mean > expected_latency * 0.9
-@pytest.mark.parametrize("device,backend", DEVICES_BACKENDS)
+@pytest.mark.parametrize("device", DEVICES)
+@pytest.mark.parametrize("backend", BACKENDS)
def test_api_memory_tracker(device, backend):
tracker = MemoryTracker(device=device, backend=backend)
+ tracker.reset()
with tracker.track():
+ time.sleep(1)
pass
# the process consumes memory that we can't control
- if backend == "pytorch":
- initial_process_memory = tracker.get_max_memory_allocated_mb()
- else:
- initial_process_memory = tracker.get_max_memory_used_mb()
+ initial_memory = tracker.get_max_memory()
+ initial_memory.log()
+ tracker.reset()
with tracker.track():
- array = torch.ones((10000, 10000), dtype=torch.float64, device=device)
- expected_memory = array.nbytes / 1e6 # around 800 MB
-
- if backend == "pytorch":
- final_process_memory = tracker.get_max_memory_allocated_mb()
+ time.sleep(1)
+ array = torch.randn((10000, 10000), dtype=torch.float64, device=device)
+ expected_memory = array.nbytes / 1e6
+ time.sleep(1)
+
+ final_memory = tracker.get_max_memory()
+ final_memory.log()
+
+ if device == "cuda":
+ if backend == "pytorch":
+ measured_memory = final_memory.max_allocated - initial_memory.max_allocated
+ else:
+ measured_memory = final_memory.max_vram - initial_memory.max_vram
+ if torch.version.hip is not None:
+ return # skip vram measurement for ROCm
else:
- final_process_memory = tracker.get_max_memory_used_mb()
-
- measured_memory = final_process_memory - initial_process_memory
+ measured_memory = final_memory.max_ram - initial_memory.max_ram
assert measured_memory < expected_memory * 1.1
assert measured_memory > expected_memory * 0.9
+ del array
+ if torch.cuda.is_available():
+ torch.cuda.empty_cache()
+ gc.collect()
+
@pytest.mark.parametrize("library,task,model", LIBRARIES_TASKS_MODELS)
def test_api_input_generator(library, task, model):
@@ -109,11 +115,7 @@ def test_api_input_generator(library, task, model):
else:
raise ValueError(f"Unknown library {library}")
- generator = InputGenerator(
- task=task,
- input_shapes=INPUT_SHAPES,
- model_shapes=model_shapes,
- )
+ generator = InputGenerator(task=task, input_shapes=INPUT_SHAPES, model_shapes=model_shapes)
if task in TEXT_GENERATION_TASKS:
_ = generator(mode="forward")
@@ -135,23 +137,31 @@ def test_api_dataset_generator(library, task, model):
else:
raise ValueError(f"Unknown library {library}")
- generator = DatasetGenerator(
- task=task,
- dataset_shapes=DATASET_SHAPES,
- model_shapes=model_shapes,
- )
+ generator = DatasetGenerator(task=task, dataset_shapes=DATASET_SHAPES, model_shapes=model_shapes)
_ = generator()
-@pytest.mark.parametrize("benchmark_config", BENCHMARK_CONFIGS)
@pytest.mark.parametrize("launcher_config", LAUNCHER_CONFIGS)
-def test_api_launch_cpu(benchmark_config, launcher_config):
- backend_config = PyTorchConfig(model="bert-base-uncased", no_weights=True, device="cpu")
+@pytest.mark.parametrize("device", DEVICES)
+def test_api_launch(launcher_config, device):
+ benchmark_config = InferenceConfig(latency=True, memory=True)
+ device_ids = ",".join(str(i) for i in range(torch.cuda.device_count())) if device == "cuda" else None
+ backend_config = PyTorchConfig(model="bert-base-uncased", device_ids=device_ids, no_weights=True, device=device)
experiment_config = ExperimentConfig(
- experiment_name="",
- benchmark=benchmark_config,
- launcher=launcher_config,
- backend=backend_config,
+ experiment_name="api-experiment", benchmark=benchmark_config, launcher=launcher_config, backend=backend_config
)
- _ = launch(experiment_config)
+ benchmark_report = launch(experiment_config)
+
+ with TemporaryDirectory() as tempdir:
+ experiment_config.to_dict()
+ experiment_config.to_flat_dict()
+ experiment_config.to_dataframe()
+ experiment_config.to_csv(f"{tempdir}/experiment_config.csv")
+ experiment_config.to_json(f"{tempdir}/experiment_config.json")
+
+ benchmark_report.to_dict()
+ benchmark_report.to_flat_dict()
+ benchmark_report.to_dataframe()
+ benchmark_report.to_csv(f"{tempdir}/benchmark_report.csv")
+ benchmark_report.to_json(f"{tempdir}/benchmark_report.json")
diff --git a/tests/test_cli.py b/tests/test_cli.py
index afae3609..739d0f89 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -11,20 +11,13 @@
TEST_CONFIG_NAMES = [
config.split(".")[0]
for config in os.listdir(TEST_CONFIG_DIR)
- if config.endswith(".yaml") and not (config.startswith("_") or config.endswith("_"))
+ if config.endswith(".yaml") and not (config.startswith("_") or config.endswith("_")) # or "ds_tp" in config)
]
@pytest.mark.parametrize("config_name", TEST_CONFIG_NAMES)
def test_cli_configs(config_name):
- args = [
- "optimum-benchmark",
- "--config-dir",
- TEST_CONFIG_DIR,
- "--config-name",
- config_name,
- "--multirun",
- ]
+ args = ["optimum-benchmark", "--config-dir", TEST_CONFIG_DIR, "--config-name", config_name, "--multirun"]
popen = run_subprocess_and_log_stream_output(LOGGER, args)
assert popen.returncode == 0, f"Failed to run {config_name}"