From 0b24af9d7b7751f74b160dfade73ef78e10964d6 Mon Sep 17 00:00:00 2001
From: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>
Date: Fri, 17 May 2024 11:28:02 +0200
Subject: [PATCH] Release (#209)
---
.github/workflows/test_cli_misc.yaml | 4 +---
.../workflows/update_llm_perf_cuda_pytorch.yaml | 3 +--
README.md | 13 ++++++++++---
llm_perf/utils.py | 8 ++++----
optimum_benchmark/version.py | 2 +-
setup.py | 17 ++++++++---------
6 files changed, 25 insertions(+), 22 deletions(-)
diff --git a/.github/workflows/test_cli_misc.yaml b/.github/workflows/test_cli_misc.yaml
index 2a6dd666..bd7a2912 100644
--- a/.github/workflows/test_cli_misc.yaml
+++ b/.github/workflows/test_cli_misc.yaml
@@ -8,7 +8,6 @@ on:
paths:
- .github/workflows/test_cli_misc.yaml
- "optimum_benchmark/**"
- - "docker/**"
- "tests/**"
- "setup.py"
pull_request:
@@ -17,7 +16,6 @@ on:
paths:
- .github/workflows/test_cli_misc.yaml
- "optimum_benchmark/**"
- - "docker/**"
- "tests/**"
- "setup.py"
@@ -31,7 +29,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest]
- python: ["3.8", "3.10"]
+ python: ["3.8", "3.9", "3.10"]
runs-on: ${{ matrix.os }}
diff --git a/.github/workflows/update_llm_perf_cuda_pytorch.yaml b/.github/workflows/update_llm_perf_cuda_pytorch.yaml
index dd023fb2..915ef08d 100644
--- a/.github/workflows/update_llm_perf_cuda_pytorch.yaml
+++ b/.github/workflows/update_llm_perf_cuda_pytorch.yaml
@@ -29,7 +29,6 @@ jobs:
- name: Run benchmarks
uses: addnab/docker-run-action@v3
env:
- IMAGE: ${{ env.IMAGE }}
SUBSET: ${{ matrix.subset }}
MACHINE: ${{ matrix.machine.name }}
HF_TOKEN: ${{ secrets.HF_TOKEN }}
@@ -49,5 +48,5 @@ jobs:
run: |
pip install packaging && pip install flash-attn einops scipy auto-gptq optimum bitsandbytes autoawq codecarbon
pip install -U transformers huggingface_hub[hf_transfer]
- pip install -e .
+ pip install optimum-benchmark
python llm_perf/update_llm_perf_cuda_pytorch.py
diff --git a/README.md b/README.md
index b1face7b..b83cbde6 100644
--- a/README.md
+++ b/README.md
@@ -1,15 +1,22 @@
-
+
All benchmarks are wrong, some will cost you less than others.
Optimum-Benchmark ๐๏ธ
+[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/optimum-benchmark)](https://pypi.org/project/optimum-benchmark/)
+[![PyPI - Version](https://img.shields.io/pypi/v/optimum-benchmark)](https://pypi.org/project/optimum-benchmark/)
+[![PyPI - Downloads](https://img.shields.io/pypi/dm/optimum-benchmark)](https://pypi.org/project/optimum-benchmark/)
+[![PyPI - Implementation](https://img.shields.io/pypi/implementation/optimum-benchmark)](https://pypi.org/project/optimum-benchmark/)
+[![PyPI - Format](https://img.shields.io/pypi/format/optimum-benchmark)](https://pypi.org/project/optimum-benchmark/)
+[![PyPI - License](https://img.shields.io/pypi/l/optimum-benchmark)](https://pypi.org/project/optimum-benchmark/)
+
Optimum-Benchmark is a unified [multi-backend & multi-device](#backends--devices-) utility for benchmarking [Transformers](https://github.com/huggingface/transformers), [Diffusers](https://github.com/huggingface/diffusers), [PEFT](https://github.com/huggingface/peft), [TIMM](https://github.com/huggingface/pytorch-image-models) and [Optimum](https://github.com/huggingface/optimum) libraries, along with all their supported [optimizations & quantization schemes](#backends--devices-), for [inference & training](#scenarios-), in [distributed & non-distributed settings](#launchers-), in the most correct, efficient and scalable way possible.
*News* ๐ฐ
-- PyPI package is now available for installation: `pip install optimum-benchmark` ๐ check it out !
+- PyPI package is now available for installation: `pip install optimum-benchmark` ๐ [check it out](https://pypi.org/project/optimum-benchmark/) !
- Hosted 4 minimal docker images (`cpu`, `cuda`, `rocm`, `cuda-ort`) in [packages](https://github.com/huggingface/optimum-benchmark/pkgs/container/optimum-benchmark) for testing, benchmarking and reproducibility ๐ณ
- Added vLLM backend for benchmarking [vLLM](https://github.com/vllm-project/vllm)'s inference engine ๐
-- Hosted the codebase of the LLM-Perf Leaderboard [LLM-Perf](https://huggingface.co/spaces/optimum/llm-perf-leaderboard) ๐ฅ
+- Hosted the codebase of the [LLM-Perf Leaderboard](https://huggingface.co/spaces/optimum/llm-perf-leaderboard) ๐ฅ
- Added Py-TXI backend for benchmarking [Py-TXI](https://github.com/IlyasMoutawwakil/py-txi/tree/main) ๐
- Introduced a Python API for running isolated benchmarks from the comfort of your Python scripts ๐
- Simplified the CLI interface for running benchmarks using the Hydra CLI ๐งช
diff --git a/llm_perf/utils.py b/llm_perf/utils.py
index e51d8065..1eea17b0 100644
--- a/llm_perf/utils.py
+++ b/llm_perf/utils.py
@@ -4,13 +4,11 @@
from optimum_benchmark.report import BenchmarkReport
-OPEN_LLM_LEADERBOARD = pd.read_csv("hf://datasets/optimum-benchmark/open-llm-leaderboard/open-llm-leaderboard.csv")
-
-
INPUT_SHAPES = {"batch_size": 1, "sequence_length": 256}
GENERATE_KWARGS = {"max_new_tokens": 64, "min_new_tokens": 64}
+OPEN_LLM_LEADERBOARD = pd.read_csv("hf://datasets/optimum-benchmark/llm-perf-leaderboard/llm-df.csv")
OPEN_LLM_LIST = OPEN_LLM_LEADERBOARD.drop_duplicates(subset=["Model"])["Model"].tolist()
PRETRAINED_OPEN_LLM_LIST = (
OPEN_LLM_LEADERBOARD[OPEN_LLM_LEADERBOARD["Type"] == "pretrained"]
@@ -44,7 +42,9 @@
# "Qwen",
# ],
# ]
-# CANONICAL_PRETRAINED_OPEN_LLM_LIST = [model for model in PRETRAINED_OPEN_LLM_LIST if model.split("/")[0] in CANONICAL_ORGANIZATIONS]
+# CANONICAL_PRETRAINED_OPEN_LLM_LIST = [
+# model for model in PRETRAINED_OPEN_LLM_LIST if model.split("/")[0] in CANONICAL_ORGANIZATIONS
+# ]
CANONICAL_PRETRAINED_OPEN_LLM_LIST = [
"01-ai/Yi-34B",
"01-ai/Yi-6B",
diff --git a/optimum_benchmark/version.py b/optimum_benchmark/version.py
index d9aa7d8b..0b959d42 100644
--- a/optimum_benchmark/version.py
+++ b/optimum_benchmark/version.py
@@ -12,4 +12,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-__version__ = "0.2.0"
+__version__ = "0.2.1"
diff --git a/setup.py b/setup.py
index 900ce99b..b024a738 100644
--- a/setup.py
+++ b/setup.py
@@ -98,26 +98,25 @@
extras_require=EXTRAS_REQUIRE,
entry_points={"console_scripts": ["optimum-benchmark=optimum_benchmark.cli:main"]},
description="Optimum-Benchmark is a unified multi-backend utility for benchmarking "
- "Transformers, Timm, Diffusers and Sentence-Transformers with full support of Optimum's "
- "hardware optimizations & quantization schemes.",
- long_description=open("README.md", "r", encoding="utf-8").read(),
- long_description_content_type="text/markdown",
+ "Transformers, Timm, Diffusers and Sentence-Transformers with full support of "
+ "Optimum's hardware optimizations & quantization schemes.",
+ url="https://github.com/huggingface/optimum-benchmark",
classifiers=[
- "License :: OSI Approved :: Apache Software License",
- "Intended Audience :: Developers",
"Intended Audience :: Education",
+ "Intended Audience :: Developers",
+ "Operating System :: POSIX :: Linux",
"Intended Audience :: Science/Research",
- "Operating System :: OS Independent",
- "Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
+ "License :: OSI Approved :: Apache Software License",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
],
keywords="benchmaek, transformers, quantization, pruning, optimization, training, inference, onnx, onnx runtime, intel, "
"habana, graphcore, neural compressor, ipex, ipu, hpu, llm-swarm, py-txi, vllm, auto-gptq, autoawq, "
"sentence-transformers, bitsandbytes, codecarbon, flash-attn, deepspeed, diffusers, timm, peft",
- url="https://github.com/huggingface/optimum-benchmark",
+ long_description=open("README.md", "r", encoding="utf-8").read(),
+ long_description_content_type="text/markdown",
author="HuggingFace Inc. Special Ops Team",
include_package_data=True,
name="optimum-benchmark",