update readme (#201)

huggingface · May 16, 2024 · 6351381 · 6351381
1 parent 6fcf23d
commit 6351381
Show file tree

Hide file tree

Showing 4 changed files with 46 additions and 5 deletions.
diff --git a/Makefile b/Makefile
@@ -1,5 +1,5 @@
 # List of targets that are not associated with files
-.PHONY: quality style install build_cpu_image build_cuda_118_image build_cuda_121_image build_rocm_image run_cpu_container run_cuda_118_container run_cuda_121_container run_rocm_container install_api_misc install_api_cpu install_api_cuda install_api_rocm install_cli_misc install_cli_cpu_pytorch install_cli_cpu_openvino install_cli_cpu_onnxruntime install_cli_cpu_neural_compressor install_cli_cuda_pytorch install_cli_rocm_pytorch install_cli_cuda_torch_ort install_cli_cuda_onnxruntime test_api_misc test_api_cpu test_api_cuda test_api_rocm test_cli_misc test_cli_cpu_pytorch test_cli_cpu_openvino test_cli_cpu_onnxruntime test_cli_cpu_neural_compressor test_cli_cuda_onnxruntime test_cli_cuda_pytorch_multi_gpu test_cli_cuda_pytorch_single_gpu test_cli_cuda_torch_ort_multi_gpu test_cli_cuda_torch_ort_single_gpu test_cli_rocm_pytorch_multi_gpu test_cli_rocm_pytorch_single_gpu install_llm_perf_cuda_pytorch run_llm_perf_cuda_pytorch_unquantized run_llm_perf_cuda_pytorch_bnb run_llm_perf_cuda_pytorch_gptq run_llm_perf_cuda_pytorch_awq
+.PHONY: quality style install build_cpu_image build_cuda_image build_cuda_ort_image build_rocm_image run_cpu_container run_cuda_container run_cuda_ort_container run_rocm_container install_api_misc install_api_cpu install_api_cuda install_api_rocm install_cli_misc install_cli_cpu_pytorch install_cli_cpu_openvino install_cli_cpu_onnxruntime install_cli_cpu_neural_compressor install_cli_cuda_pytorch install_cli_rocm_pytorch install_cli_cuda_torch_ort install_cli_cuda_onnxruntime test_api_misc test_api_cpu test_api_cuda test_api_rocm test_cli_misc test_cli_cpu_pytorch test_cli_cpu_openvino test_cli_cpu_onnxruntime test_cli_cpu_neural_compressor test_cli_cuda_onnxruntime test_cli_cuda_vllm test_cli_cuda_pytorch_multi_gpu test_cli_cuda_pytorch_single_gpu test_cli_cuda_torch_ort_multi_gpu test_cli_cuda_torch_ort_single_gpu test_cli_rocm_pytorch_multi_gpu test_cli_rocm_pytorch_single_gpu install_llm_perf_cuda_pytorch run_llm_perf_cuda_pytorch_unquantized run_llm_perf_cuda_pytorch_bnb run_llm_perf_cuda_pytorch_gptq run_llm_perf_cuda_pytorch_awq
 
 PWD := $(shell pwd)
 USER_ID := $(shell id -u)
@@ -151,6 +151,9 @@ test_cli_cpu_neural_compressor:
 test_cli_cuda_onnxruntime:
 	pytest -s -k "cli and cuda and onnxruntime"
 
+test_cli_cuda_vllm:
+	pytest -s -k "cli and cuda and vllm"
+
 test_cli_cuda_pytorch_multi_gpu:
 	pytest -s -k "cli and cuda and pytorch and (dp or ddp or device_map or deepspeed) and not awq"
 

diff --git a/README.md b/README.md
@@ -41,6 +41,7 @@ Optimum-Benchmark is continuously and intensively tested on a variety of devices
 [![CLI_CPU_PYTORCH](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_pytorch.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_pytorch.yaml)
 [![CLI_CPU_PY_TXI](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_py_txi.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cpu_py_txi.yaml)
 [![CLI_CUDA_ONNXRUNTIME](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_onnxruntime.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_onnxruntime.yaml)
+[![CLI_CUDA_VLLM](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_vllm.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_vllm.yaml)
 [![CLI_CUDA_PYTORCH_MULTI_GPU](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_pytorch_multi_gpu.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_pytorch_multi_gpu.yaml)
 [![CLI_CUDA_PYTORCH_SINGLE_GPU](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_pytorch_single_gpu.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_pytorch_single_gpu.yaml)
 [![CLI_CUDA_TENSORRT_LLM](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_tensorrt_llm.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_tensorrt_llm.yaml)
@@ -81,12 +82,28 @@ Depending on the backends you want to use, you can install `optimum-benchmark` w
 - OnnxRuntime-GPU: `pip install optimum-benchmark[onnxruntime-gpu]`
 - Neural Compressor: `pip install optimum-benchmark[neural-compressor]`
 - Py-TXI: `pip install optimum-benchmark[py-txi]`
+- vLLM: `pip install optimum-benchmark[vllm]`
+
+We also support the following extra extra dependencies:
+
+- autoawq
+- auto-gptq
+- autoawq-rocm
+- auto-gptq-rocm
+- sentence-transformers
+- bitsandbytes
+- codecarbon
+- flash-attn
+- deepspeed
+- diffusers
+- timm
+- peft
 
 </details>
 
 ### Running benchmarks using the Python API 🧪
 
-You can run benchmarks from the Python API, using the `Benchmark` class and its `launch` method. It takes a `BenchmarkConfig` object as input, runs the benchmark in isolated process and returns a `BenchmarkReport` object containing the benchmark results.
+You can run benchmarks from the Python API, using the `Benchmark` class and its `launch` method. It takes a `BenchmarkConfig` object as input, runs the benchmark in an isolated process and returns a `BenchmarkReport` object containing the benchmark results.
 
 Here's an example of how to run an isolated benchmark using the `pytorch` backend, `torchrun` launcher and `inference` scenario with latency and memory tracking enabled.
 
@@ -108,13 +125,28 @@ if __name__ == "__main__":
     )
     benchmark_report = Benchmark.launch(benchmark_config)
 
+    # log the benchmark in terminal
+    benchmark_report.log() # or print(benchmark_report)
+
+    # convert artifacts to a dictionary or dataframe
+    benchmark_config.to_dict() # or benchmark_config.to_dataframe()
+
+    # save artifacts to disk as json or csv files
+    benchmark_report.save_csv("benchmark_report.csv") # or benchmark_report.save_json("benchmark_report.json")
+
     # push artifacts to the hub
-    benchmark_config.push_to_hub("IlyasMoutawwakil/pytorch_gpt2")
-    benchmark_report.push_to_hub("IlyasMoutawwakil/pytorch_gpt2")
+    benchmark_config.push_to_hub("IlyasMoutawwakil/pytorch_gpt2") # or benchmark_config.push_to_hub("IlyasMoutawwakil/pytorch_gpt2")
 
     # or merge them into a single artifact
     benchmark = Benchmark(config=benchmark_config, report=benchmark_report)
+    benchmark.save_json("benchmark.json") # or benchmark.save_csv("benchmark.csv")
     benchmark.push_to_hub("IlyasMoutawwakil/pytorch_gpt2")
+
+    # load artifacts from the hub
+    benchmark = Benchmark.from_hub("IlyasMoutawwakil/pytorch_gpt2") # or Benchmark.from_hub("IlyasMoutawwakil/pytorch_gpt2")
+
+    # or load them from disk
+    benchmark = Benchmark.load_json("benchmark.json") # or Benchmark.load_csv("benchmark_report.csv")
 ```
 
 If you're on VSCode, you can hover over the configuration classes to see the available parameters and their descriptions. You can also see the available parameters in the [Features](#features-) section below.
@@ -230,6 +262,9 @@ See [TrainingConfig](optimum_benchmark/scenarios/training/config.py) for more in
 - [x] Torch-ORT backend for CUDA (`backend=torch-ort`, `backend.device=cuda`)
 - [x] OpenVINO backend for CPU (`backend=openvino`, `backend.device=cpu`)
 - [x] OpenVINO backend for GPU (`backend=openvino`, `backend.device=gpu`)
+- [x] vLLM backend for CUDA (`backend=vllm`, `backend.device=cuda`)
+- [x] vLLM backend for ROCM (`backend=vllm`, `backend.device=rocm`)
+- [x] vLLM backend for CPU (`backend=vllm`, `backend.device=cpu`)
 
 <details>
 <summary>General backend features 🧰</summary>
@@ -246,6 +281,7 @@ See [TrainingConfig](optimum_benchmark/scenarios/training/config.py) for more in
 
 For more information on the features of each backend, you can check their respective configuration files:
 
+- [VLLMConfig](optimum_benchmark/backends/vllm/config.py)
 - [OVConfig](optimum_benchmark/backends/openvino/config.py)
 - [PyTXIConfig](optimum_benchmark/backends/py_txi/config.py)
 - [PyTorchConfig](optimum_benchmark/backends/pytorch/config.py)

diff --git a/pyproject.toml b/pyproject.toml
@@ -12,4 +12,5 @@ skip-magic-trailing-comma = false
 [tool.pytest.ini_options]
 log_cli = true
 log_cli_level = "INFO"
-log_cli_format = "[%(asctime)s][%(name)s][%(levelname)s] - %(message)s"
+log_cli_date_format = "%Y-%m-%d %H:%M:%S"
+log_cli_format = "[PYTEST-PROCESS][%(asctime)s][%(name)s][%(levelname)s] - %(message)s"
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -7,6 +7,7 @@
 
 LOGGER = getLogger("test")
 
+
 TEST_CONFIG_DIR = "/".join(__file__.split("/")[:-1] + ["configs"])
 TEST_CONFIG_NAMES = [
     config.split(".")[0]