Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update llm perf #194

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
name: LLM Perf Benchmarks - CUDA PyTorch
name: Update LLM Perf Benchmarks - CUDA PyTorch

on:
workflow_dispatch:
push:
branches:
- update-llm-perf
schedule:
# Every day at 00:00 UTC
- cron: "0 0 * * *"

concurrency:
Expand Down Expand Up @@ -53,7 +55,7 @@ jobs:
--volume ${{ github.workspace }}:/workspace
--workdir /workspace
run: |
pip install packaging && pip install flash-attn einops scipy auto-gptq optimum bitsandbytes autoawq
pip install packaging && pip install flash-attn einops scipy auto-gptq optimum bitsandbytes autoawq codecarbon
pip install -U transformers huggingface_hub[hf_transfer]
pip install -e .[codecarbon]
pip install -e .
python llm_perf/benchmark_cuda_pytorch.py
36 changes: 36 additions & 0 deletions .github/workflows/update_open_llm_leaderboard.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: Update Open LLM Leaderboard

on:
workflow_dispatch:
push:
branches:
- update-llm-perf
schedule:
- cron: "0 0 * * *"

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
update_open_llm_leaderboard:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3

- name: Set up Python 3.10
uses: actions/setup-python@v3
with:
python-version: "3.10"

- name: Install requirements
run: |
pip install --upgrade pip
pip install pandas huggingface-hub

- name: Update Open LLM Leaderboard
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
python scripts/update_open_llm_leaderboard.py
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,9 @@ test_cli_rocm_pytorch_single_gpu:
# llm-perf

install_llm_perf_cuda_pytorch:
pip install packaging && pip install flash-attn einops scipy auto-gptq optimum bitsandbytes autoawq
pip install packaging && pip install flash-attn einops scipy auto-gptq optimum bitsandbytes autoawq codecarbon
pip install -U transformers huggingface_hub[hf_transfer]
pip install -e .[codecarbon]
pip install -e .

run_llm_perf_cuda_pytorch_unquantized:
SUBSET=unquantized python llm_perf/benchmark_cuda_pytorch.py
Expand Down
36 changes: 0 additions & 36 deletions llm_perf/constants.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,24 @@
from itertools import product
from logging import getLogger

from llm_perf.constants import CANONICAL_MODELS_LIST, GENERATE_KWARGS, INPUT_SHAPES, PRETRAINED_MODELS_LIST
from llm_perf.utils import common_errors_reporter, is_experiment_conducted, is_experiment_not_supported
from optimum_benchmark.backends.pytorch.config import PyTorchConfig
from optimum_benchmark.benchmarks.inference.config import InferenceConfig
from optimum_benchmark.experiment import ExperimentConfig, launch
from optimum_benchmark.launchers.process.config import ProcessConfig
from llm_perf.utils import (
CANONICAL_PRETRAINED_OPEN_LLM_LIST,
GENERATE_KWARGS,
INPUT_SHAPES,
OPEN_LLM_LIST,
PRETRAINED_OPEN_LLM_LIST,
errors_reporter,
is_benchmark_conducted,
is_benchmark_supported,
)
from optimum_benchmark import Benchmark, BenchmarkConfig, InferenceConfig, ProcessConfig, PyTorchConfig
from optimum_benchmark.logging_utils import setup_logging

CWD = os.getcwd()
MACHINE = os.getenv("MACHINE", "1xA100")
SUBSET = os.getenv("SUBSET", "unquantized")
CANONICAL_MODELS_ONLY = os.getenv("CANONICAL_MODELS_ONLY", "1") == "1"
PUSH_REPO_ID = f"optimum-benchmark/llm-perf-pytorch-cuda-{SUBSET}-{MACHINE}"


ATTENTION_COFIGS = ["eager", "sdpa", "flash_attention_2"]
if SUBSET == "unquantized":
WEIGHTS_CONFIGS = {
Expand Down Expand Up @@ -79,25 +82,26 @@
}


setup_logging()
LOGGER = getLogger("llm-perf-backend")
LOGGER.info(f"len(OPEN_LLM_LIST): {len(OPEN_LLM_LIST)}")
LOGGER.info(f"len(PRETRAINED_OPEN_LLM_LIST): {len(PRETRAINED_OPEN_LLM_LIST)}")
LOGGER.info(f"len(CANONICAL_PRETRAINED_OPEN_LLM_LIST): {len(CANONICAL_PRETRAINED_OPEN_LLM_LIST)}")


def benchmark_cuda_pytorch(model, attn_implementation, weights_config):
benchmark_name = f"{weights_config}-{attn_implementation}"
subfolder = f"{benchmark_name}/{model.replace('/', '--')}"

torch_dtype = WEIGHTS_CONFIGS[weights_config]["torch_dtype"]
quant_scheme = WEIGHTS_CONFIGS[weights_config]["quant_scheme"]
quant_config = WEIGHTS_CONFIGS[weights_config]["quant_config"]

if is_experiment_not_supported(torch_dtype, attn_implementation):
LOGGER.info(f"Skipping experiment with model {model} since it is not supported")
if not is_benchmark_supported(weights_config, attn_implementation):
LOGGER.info(f"Skipping benchmark {benchmark_name} with model {model} since it is not supported")
return

launcher_config = ProcessConfig(
start_method="spawn",
device_isolation=True,
device_isolation_action="error",
)
benchmark_config = InferenceConfig(
launcher_config = ProcessConfig(device_isolation=True, device_isolation_action="kill")
scenario_config = InferenceConfig(
memory=True,
energy=True,
latency=True,
Expand All @@ -110,7 +114,7 @@ def benchmark_cuda_pytorch(model, attn_implementation, weights_config):
backend_config = PyTorchConfig(
model=model,
device="cuda",
device_ids="0",
device_ids="4",
no_weights=True,
library="transformers",
task="text-generation",
Expand All @@ -120,38 +124,41 @@ def benchmark_cuda_pytorch(model, attn_implementation, weights_config):
attn_implementation=attn_implementation,
)

experiment_name = f"{weights_config}-{attn_implementation}"
subfolder = f"{experiment_name}/{model.replace('/', '--')}"

experiment_config = ExperimentConfig(
experiment_name=experiment_name,
benchmark=benchmark_config,
launcher=launcher_config,
backend=backend_config,
benchmark_config = BenchmarkConfig(
name=benchmark_name, scenario=scenario_config, launcher=launcher_config, backend=backend_config
)

if is_experiment_conducted(experiment_config, PUSH_REPO_ID, subfolder):
LOGGER.info(f"Skipping experiment {experiment_name} with model {model} since it was already conducted")
if is_benchmark_conducted(benchmark_config, PUSH_REPO_ID, subfolder):
LOGGER.info(f"Skipping benchmark {benchmark_name} with model {model} since it was already conducted")
return

experiment_config.push_to_hub(subfolder=subfolder, repo_id=PUSH_REPO_ID, private=True)
benchmark_config.push_to_hub(subfolder=subfolder, repo_id=PUSH_REPO_ID, private=True)

try:
benchmark_report = launch(experiment_config)
LOGGER.info(f"Running benchmark {benchmark_name} with model {model}")
benchmark_report = Benchmark.launch(benchmark_config)
benchmark_report.push_to_hub(subfolder=subfolder, repo_id=PUSH_REPO_ID, private=True)
except Exception as error:
os.chdir(CWD) # TODO: figure our why this is happening
LOGGER.error(f"Experiment {experiment_name} failed with model {model}")
common_errors_reporter(error, LOGGER, subfolder, PUSH_REPO_ID)
LOGGER.error(f"Benchmark {benchmark_name} failed with model {model}")
valid_error, benchmark_report = errors_reporter(error)
LOGGER.error(benchmark_report.error, exc_info=True)
if valid_error:
benchmark_report.push_to_hub(subfolder=subfolder, repo_id=PUSH_REPO_ID, private=True)


if __name__ == "__main__":
if CANONICAL_MODELS_ONLY:
models_attentions_weights = list(product(CANONICAL_MODELS_LIST, ATTENTION_COFIGS, WEIGHTS_CONFIGS.keys()))
print(f"Total number of canonical models experiments: {len(models_attentions_weights)}")
else:
models_attentions_weights = list(product(PRETRAINED_MODELS_LIST, ATTENTION_COFIGS, WEIGHTS_CONFIGS.keys()))
print(f"Total number of pretrained models experiments: {len(models_attentions_weights)}")
setup_logging(level="INFO", format_prefix="MAIN-PROCESS")

models_attentions_weights = list(
product(CANONICAL_PRETRAINED_OPEN_LLM_LIST, ATTENTION_COFIGS, WEIGHTS_CONFIGS.keys())
)

LOGGER.info(
f"Running a total of {len(models_attentions_weights)} benchmarks, "
f"with {len(CANONICAL_PRETRAINED_OPEN_LLM_LIST)} models, "
f"{len(ATTENTION_COFIGS)} attentions implementations"
f"and {len(WEIGHTS_CONFIGS)} weights configurations"
)

for model, attn_implementation, weights_config in models_attentions_weights:
benchmark_cuda_pytorch(model, attn_implementation, weights_config)
42 changes: 42 additions & 0 deletions llm_perf/update_open_llm_leaderboard.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import subprocess

import pandas as pd
from huggingface_hub import create_repo, upload_file

scrapping_script = """
git clone https://github.com/Weyaxi/scrape-open-llm-leaderboard.git
pip install -r scrape-open-llm-leaderboard/requirements.txt
python scrape-open-llm-leaderboard/main.py
rm -rf scrape-open-llm-leaderboard
"""


def run_scrapper():
subprocess.run(scrapping_script, shell=True)


def main():
run_scrapper()

open_llm_leaderboard = pd.read_csv("open-llm-leaderboard.csv")

if len(open_llm_leaderboard) > 0:
create_repo(
repo_id="optimum-benchmark/open-llm-leaderboard",
repo_type="dataset",
exist_ok=True,
private=False,
)
upload_file(
repo_id="optimum-benchmark/open-llm-leaderboard",
commit_message="Update open LLM leaderboard",
path_or_fileobj="open-llm-leaderboard.csv",
path_in_repo="open-llm-leaderboard.csv",
repo_type="dataset",
)
else:
raise ValueError("No models found")


if __name__ == "__main__":
main()
Loading
Loading