Skip to content

Commit

Permalink
Update llm perf (#195)
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil authored May 9, 2024
1 parent 2e77e02 commit d35829e
Show file tree
Hide file tree
Showing 9 changed files with 311 additions and 119 deletions.
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
name: LLM Perf Benchmarks - CUDA PyTorch
name: Update LLM Perf Benchmarks - CUDA PyTorch

on:
workflow_dispatch:
schedule:
# Every day at 00:00 UTC
- cron: "0 0 * * *"

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
build_image_and_run_cuda_pytorch:
build_image_and_run_benchmarks:
strategy:
fail-fast: false
matrix:
Expand Down Expand Up @@ -53,7 +52,7 @@ jobs:
--volume ${{ github.workspace }}:/workspace
--workdir /workspace
run: |
pip install packaging && pip install flash-attn einops scipy auto-gptq optimum bitsandbytes autoawq
pip install packaging && pip install flash-attn einops scipy auto-gptq optimum bitsandbytes autoawq codecarbon
pip install -U transformers huggingface_hub[hf_transfer]
pip install -e .[codecarbon]
python llm_perf/benchmark_cuda_pytorch.py
pip install -e .
python llm_perf/update_llm_perf_cuda_pytorch.py
33 changes: 33 additions & 0 deletions .github/workflows/update_open_llm_leaderboard.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name: Update Open LLM Leaderboard

on:
workflow_dispatch:
schedule:
- cron: "0 0 * * *"

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
update_open_llm_leaderboard:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3

- name: Set up Python 3.10
uses: actions/setup-python@v3
with:
python-version: "3.10"

- name: Install requirements
run: |
pip install --upgrade pip
pip install pandas huggingface-hub
- name: Update Open LLM Leaderboard
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
python llm_perf/update_open_llm_leaderboard.py
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,9 @@ test_cli_rocm_pytorch_single_gpu:
# llm-perf

install_llm_perf_cuda_pytorch:
pip install packaging && pip install flash-attn einops scipy auto-gptq optimum bitsandbytes autoawq
pip install packaging && pip install flash-attn einops scipy auto-gptq optimum bitsandbytes autoawq codecarbon
pip install -U transformers huggingface_hub[hf_transfer]
pip install -e .[codecarbon]
pip install -e .

run_llm_perf_cuda_pytorch_unquantized:
SUBSET=unquantized python llm_perf/benchmark_cuda_pytorch.py
Expand Down
36 changes: 0 additions & 36 deletions llm_perf/constants.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,32 @@
from itertools import product
from logging import getLogger

from llm_perf.constants import CANONICAL_MODELS_LIST, GENERATE_KWARGS, INPUT_SHAPES, PRETRAINED_MODELS_LIST
from llm_perf.utils import common_errors_reporter, is_experiment_conducted, is_experiment_not_supported
from optimum_benchmark.backends.pytorch.config import PyTorchConfig
from optimum_benchmark.benchmarks.inference.config import InferenceConfig
from optimum_benchmark.experiment import ExperimentConfig, launch
from optimum_benchmark.launchers.process.config import ProcessConfig
from llm_perf.utils import (
CANONICAL_PRETRAINED_OPEN_LLM_LIST,
GENERATE_KWARGS,
INPUT_SHAPES,
OPEN_LLM_LIST,
PRETRAINED_OPEN_LLM_LIST,
errors_handler,
is_benchmark_conducted,
is_benchmark_supported,
)
from optimum_benchmark import Benchmark, BenchmarkConfig, InferenceConfig, ProcessConfig, PyTorchConfig
from optimum_benchmark.logging_utils import setup_logging

CWD = os.getcwd()
MACHINE = os.getenv("MACHINE", "1xA100")
SUBSET = os.getenv("SUBSET", "unquantized")
CANONICAL_MODELS_ONLY = os.getenv("CANONICAL_MODELS_ONLY", "1") == "1"
PUSH_REPO_ID = f"optimum-benchmark/llm-perf-pytorch-cuda-{SUBSET}-{MACHINE}"
SUBSET = os.getenv("SUBSET", None)
MACHINE = os.getenv("MACHINE", None)


if os.getenv("MACHINE", None) is None and os.getenv("SUBSET", None) is None:
PUSH_REPO_ID = "optimum-benchmark/llm-perf-pytorch-cuda-debug"
CANONICAL_PRETRAINED_OPEN_LLM_LIST = ["gpt2"]
SUBSET = "unquantized"
elif os.getenv("MACHINE", None) is not None and os.getenv("SUBSET", None) is not None:
PUSH_REPO_ID = f"optimum-benchmark/llm-perf-pytorch-cuda-{SUBSET}-{MACHINE}"
else:
raise ValueError("Either both MACHINE and SUBSET should be set for benchmarking or neither for debugging")

ATTENTION_COFIGS = ["eager", "sdpa", "flash_attention_2"]
if SUBSET == "unquantized":
WEIGHTS_CONFIGS = {
Expand Down Expand Up @@ -79,25 +90,26 @@
}


setup_logging()
LOGGER = getLogger("llm-perf-backend")
LOGGER.info(f"len(OPEN_LLM_LIST): {len(OPEN_LLM_LIST)}")
LOGGER.info(f"len(PRETRAINED_OPEN_LLM_LIST): {len(PRETRAINED_OPEN_LLM_LIST)}")
LOGGER.info(f"len(CANONICAL_PRETRAINED_OPEN_LLM_LIST): {len(CANONICAL_PRETRAINED_OPEN_LLM_LIST)}")


def benchmark_cuda_pytorch(model, attn_implementation, weights_config):
benchmark_name = f"{weights_config}-{attn_implementation}"
subfolder = f"{benchmark_name}/{model.replace('/', '--')}"

torch_dtype = WEIGHTS_CONFIGS[weights_config]["torch_dtype"]
quant_scheme = WEIGHTS_CONFIGS[weights_config]["quant_scheme"]
quant_config = WEIGHTS_CONFIGS[weights_config]["quant_config"]

if is_experiment_not_supported(torch_dtype, attn_implementation):
LOGGER.info(f"Skipping experiment with model {model} since it is not supported")
if not is_benchmark_supported(weights_config, attn_implementation):
LOGGER.info(f"Skipping benchmark {benchmark_name} with model {model} since it is not supported")
return

launcher_config = ProcessConfig(
start_method="spawn",
device_isolation=True,
device_isolation_action="error",
)
benchmark_config = InferenceConfig(
launcher_config = ProcessConfig(device_isolation=True, device_isolation_action="kill")
scenario_config = InferenceConfig(
memory=True,
energy=True,
latency=True,
Expand All @@ -118,40 +130,53 @@ def benchmark_cuda_pytorch(model, attn_implementation, weights_config):
quantization_scheme=quant_scheme,
quantization_config=quant_config,
attn_implementation=attn_implementation,
hub_kwargs={"trust_remote_code": True},
)

experiment_name = f"{weights_config}-{attn_implementation}"
subfolder = f"{experiment_name}/{model.replace('/', '--')}"

experiment_config = ExperimentConfig(
experiment_name=experiment_name,
benchmark=benchmark_config,
launcher=launcher_config,
backend=backend_config,
benchmark_config = BenchmarkConfig(
name=benchmark_name, scenario=scenario_config, launcher=launcher_config, backend=backend_config
)

if is_experiment_conducted(experiment_config, PUSH_REPO_ID, subfolder):
LOGGER.info(f"Skipping experiment {experiment_name} with model {model} since it was already conducted")
if is_benchmark_conducted(benchmark_config, PUSH_REPO_ID, subfolder):
LOGGER.info(f"Skipping benchmark {benchmark_name} with model {model} since it was already conducted")
return

experiment_config.push_to_hub(subfolder=subfolder, repo_id=PUSH_REPO_ID, private=True)
benchmark_config.push_to_hub(subfolder=subfolder, repo_id=PUSH_REPO_ID, private=True)

try:
benchmark_report = launch(experiment_config)
LOGGER.info(f"Running benchmark {benchmark_name} with model {model}")
benchmark_report = Benchmark.launch(benchmark_config)
benchmark_report.push_to_hub(subfolder=subfolder, repo_id=PUSH_REPO_ID, private=True)
benchmark = Benchmark(config=benchmark_config, report=benchmark_report)
benchmark.push_to_hub(subfolder=subfolder, repo_id=PUSH_REPO_ID, private=True)

except Exception as error:
os.chdir(CWD) # TODO: figure our why this is happening
LOGGER.error(f"Experiment {experiment_name} failed with model {model}")
common_errors_reporter(error, LOGGER, subfolder, PUSH_REPO_ID)
LOGGER.error(f"Benchmark {benchmark_name} failed with model {model}")
valid_error, benchmark_report = errors_handler(str(error))

if valid_error:
LOGGER.error("The error is a valid one, reporting it")
LOGGER.error(benchmark_report.error)
benchmark_report.push_to_hub(subfolder=subfolder, repo_id=PUSH_REPO_ID, private=True)
else:
LOGGER.error("The error is not valid, need to investigate")
LOGGER.error(benchmark_report.error)
return


if __name__ == "__main__":
if CANONICAL_MODELS_ONLY:
models_attentions_weights = list(product(CANONICAL_MODELS_LIST, ATTENTION_COFIGS, WEIGHTS_CONFIGS.keys()))
print(f"Total number of canonical models experiments: {len(models_attentions_weights)}")
else:
models_attentions_weights = list(product(PRETRAINED_MODELS_LIST, ATTENTION_COFIGS, WEIGHTS_CONFIGS.keys()))
print(f"Total number of pretrained models experiments: {len(models_attentions_weights)}")
setup_logging(level="INFO", format_prefix="MAIN-PROCESS")

models_attentions_weights = list(
product(CANONICAL_PRETRAINED_OPEN_LLM_LIST, ATTENTION_COFIGS, WEIGHTS_CONFIGS.keys())
)

LOGGER.info(
f"Running a total of {len(models_attentions_weights)} benchmarks, "
f"with {len(CANONICAL_PRETRAINED_OPEN_LLM_LIST)} models, "
f"{len(ATTENTION_COFIGS)} attentions implementations"
f"and {len(WEIGHTS_CONFIGS)} weights configurations"
)

for model, attn_implementation, weights_config in models_attentions_weights:
benchmark_cuda_pytorch(model, attn_implementation, weights_config)
42 changes: 42 additions & 0 deletions llm_perf/update_open_llm_leaderboard.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import subprocess

import pandas as pd
from huggingface_hub import create_repo, upload_file

scrapping_script = """
git clone https://github.com/Weyaxi/scrape-open-llm-leaderboard.git
pip install -r scrape-open-llm-leaderboard/requirements.txt
python scrape-open-llm-leaderboard/main.py
rm -rf scrape-open-llm-leaderboard
"""


def run_scrapper():
subprocess.run(scrapping_script, shell=True)


def main():
run_scrapper()

open_llm_leaderboard = pd.read_csv("open-llm-leaderboard.csv")

if len(open_llm_leaderboard) > 0:
create_repo(
repo_id="optimum-benchmark/open-llm-leaderboard",
repo_type="dataset",
exist_ok=True,
private=False,
)
upload_file(
repo_id="optimum-benchmark/open-llm-leaderboard",
commit_message="Update open LLM leaderboard",
path_or_fileobj="open-llm-leaderboard.csv",
path_in_repo="open-llm-leaderboard.csv",
repo_type="dataset",
)
else:
raise ValueError("No models found")


if __name__ == "__main__":
main()
Loading

0 comments on commit d35829e

Please sign in to comment.