Update llm perf (#195)

huggingface · May 9, 2024 · d35829e · d35829e
1 parent 2e77e02
commit d35829e
Show file tree

Hide file tree

Showing 9 changed files with 311 additions and 119 deletions.
diff --git a/.github/workflows/llm_perf_cuda_pytorch.yaml → ...rkflows/update_llm_perf_cuda_pytorch.yaml b/.github/workflows/llm_perf_cuda_pytorch.yaml → ...rkflows/update_llm_perf_cuda_pytorch.yaml
@@ -1,17 +1,16 @@
-name: LLM Perf Benchmarks - CUDA PyTorch
+name: Update LLM Perf Benchmarks - CUDA PyTorch
 
 on:
   workflow_dispatch:
   schedule:
-    # Every day at 00:00 UTC
     - cron: "0 0 * * *"
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
   cancel-in-progress: true
 
 jobs:
-  build_image_and_run_cuda_pytorch:
+  build_image_and_run_benchmarks:
     strategy:
       fail-fast: false
       matrix:
@@ -53,7 +52,7 @@ jobs:
             --volume ${{ github.workspace }}:/workspace
             --workdir /workspace
           run: |
-            pip install packaging && pip install flash-attn einops scipy auto-gptq optimum bitsandbytes autoawq
+            pip install packaging && pip install flash-attn einops scipy auto-gptq optimum bitsandbytes autoawq codecarbon
             pip install -U transformers huggingface_hub[hf_transfer]
-            pip install -e .[codecarbon]
-            python llm_perf/benchmark_cuda_pytorch.py
+            pip install -e .
+            python llm_perf/update_llm_perf_cuda_pytorch.py
diff --git a/.github/workflows/update_open_llm_leaderboard.yaml b/.github/workflows/update_open_llm_leaderboard.yaml
@@ -0,0 +1,33 @@
+name: Update Open LLM Leaderboard
+
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "0 0 * * *"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  update_open_llm_leaderboard:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v3
+        with:
+          python-version: "3.10"
+
+      - name: Install requirements
+        run: |
+          pip install --upgrade pip
+          pip install pandas huggingface-hub
+
+      - name: Update Open LLM Leaderboard
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: |
+          python llm_perf/update_open_llm_leaderboard.py
diff --git a/Makefile b/Makefile
@@ -173,9 +173,9 @@ test_cli_rocm_pytorch_single_gpu:
 # llm-perf
 
 install_llm_perf_cuda_pytorch:
-	pip install packaging && pip install flash-attn einops scipy auto-gptq optimum bitsandbytes autoawq
+	pip install packaging && pip install flash-attn einops scipy auto-gptq optimum bitsandbytes autoawq codecarbon
 	pip install -U transformers huggingface_hub[hf_transfer]
-	pip install -e .[codecarbon]
+	pip install -e .
 
 run_llm_perf_cuda_pytorch_unquantized:
 	SUBSET=unquantized python llm_perf/benchmark_cuda_pytorch.py

diff --git a/llm_perf/constants.py b/llm_perf/constants.py
diff --git a/llm_perf/benchmark_cuda_pytorch.py → llm_perf/update_llm_perf_cuda_pytorch.py b/llm_perf/benchmark_cuda_pytorch.py → llm_perf/update_llm_perf_cuda_pytorch.py
@@ -2,21 +2,32 @@
 from itertools import product
 from logging import getLogger
 
-from llm_perf.constants import CANONICAL_MODELS_LIST, GENERATE_KWARGS, INPUT_SHAPES, PRETRAINED_MODELS_LIST
-from llm_perf.utils import common_errors_reporter, is_experiment_conducted, is_experiment_not_supported
-from optimum_benchmark.backends.pytorch.config import PyTorchConfig
-from optimum_benchmark.benchmarks.inference.config import InferenceConfig
-from optimum_benchmark.experiment import ExperimentConfig, launch
-from optimum_benchmark.launchers.process.config import ProcessConfig
+from llm_perf.utils import (
+    CANONICAL_PRETRAINED_OPEN_LLM_LIST,
+    GENERATE_KWARGS,
+    INPUT_SHAPES,
+    OPEN_LLM_LIST,
+    PRETRAINED_OPEN_LLM_LIST,
+    errors_handler,
+    is_benchmark_conducted,
+    is_benchmark_supported,
+)
+from optimum_benchmark import Benchmark, BenchmarkConfig, InferenceConfig, ProcessConfig, PyTorchConfig
 from optimum_benchmark.logging_utils import setup_logging
 
-CWD = os.getcwd()
-MACHINE = os.getenv("MACHINE", "1xA100")
-SUBSET = os.getenv("SUBSET", "unquantized")
-CANONICAL_MODELS_ONLY = os.getenv("CANONICAL_MODELS_ONLY", "1") == "1"
-PUSH_REPO_ID = f"optimum-benchmark/llm-perf-pytorch-cuda-{SUBSET}-{MACHINE}"
+SUBSET = os.getenv("SUBSET", None)
+MACHINE = os.getenv("MACHINE", None)
 
 
+if os.getenv("MACHINE", None) is None and os.getenv("SUBSET", None) is None:
+    PUSH_REPO_ID = "optimum-benchmark/llm-perf-pytorch-cuda-debug"
+    CANONICAL_PRETRAINED_OPEN_LLM_LIST = ["gpt2"]
+    SUBSET = "unquantized"
+elif os.getenv("MACHINE", None) is not None and os.getenv("SUBSET", None) is not None:
+    PUSH_REPO_ID = f"optimum-benchmark/llm-perf-pytorch-cuda-{SUBSET}-{MACHINE}"
+else:
+    raise ValueError("Either both MACHINE and SUBSET should be set for benchmarking or neither for debugging")
+
 ATTENTION_COFIGS = ["eager", "sdpa", "flash_attention_2"]
 if SUBSET == "unquantized":
     WEIGHTS_CONFIGS = {
@@ -79,25 +90,26 @@
     }
 
 
-setup_logging()
 LOGGER = getLogger("llm-perf-backend")
+LOGGER.info(f"len(OPEN_LLM_LIST): {len(OPEN_LLM_LIST)}")
+LOGGER.info(f"len(PRETRAINED_OPEN_LLM_LIST): {len(PRETRAINED_OPEN_LLM_LIST)}")
+LOGGER.info(f"len(CANONICAL_PRETRAINED_OPEN_LLM_LIST): {len(CANONICAL_PRETRAINED_OPEN_LLM_LIST)}")
 
 
 def benchmark_cuda_pytorch(model, attn_implementation, weights_config):
+    benchmark_name = f"{weights_config}-{attn_implementation}"
+    subfolder = f"{benchmark_name}/{model.replace('/', '--')}"
+
     torch_dtype = WEIGHTS_CONFIGS[weights_config]["torch_dtype"]
     quant_scheme = WEIGHTS_CONFIGS[weights_config]["quant_scheme"]
     quant_config = WEIGHTS_CONFIGS[weights_config]["quant_config"]
 
-    if is_experiment_not_supported(torch_dtype, attn_implementation):
-        LOGGER.info(f"Skipping experiment with model {model} since it is not supported")
+    if not is_benchmark_supported(weights_config, attn_implementation):
+        LOGGER.info(f"Skipping benchmark {benchmark_name} with model {model} since it is not supported")
         return
 
-    launcher_config = ProcessConfig(
-        start_method="spawn",
-        device_isolation=True,
-        device_isolation_action="error",
-    )
-    benchmark_config = InferenceConfig(
+    launcher_config = ProcessConfig(device_isolation=True, device_isolation_action="kill")
+    scenario_config = InferenceConfig(
         memory=True,
         energy=True,
         latency=True,
@@ -118,40 +130,53 @@ def benchmark_cuda_pytorch(model, attn_implementation, weights_config):
         quantization_scheme=quant_scheme,
         quantization_config=quant_config,
         attn_implementation=attn_implementation,
+        hub_kwargs={"trust_remote_code": True},
     )
 
-    experiment_name = f"{weights_config}-{attn_implementation}"
-    subfolder = f"{experiment_name}/{model.replace('/', '--')}"
-
-    experiment_config = ExperimentConfig(
-        experiment_name=experiment_name,
-        benchmark=benchmark_config,
-        launcher=launcher_config,
-        backend=backend_config,
+    benchmark_config = BenchmarkConfig(
+        name=benchmark_name, scenario=scenario_config, launcher=launcher_config, backend=backend_config
     )
 
-    if is_experiment_conducted(experiment_config, PUSH_REPO_ID, subfolder):
-        LOGGER.info(f"Skipping experiment {experiment_name} with model {model} since it was already conducted")
+    if is_benchmark_conducted(benchmark_config, PUSH_REPO_ID, subfolder):
+        LOGGER.info(f"Skipping benchmark {benchmark_name} with model {model} since it was already conducted")
         return
 
-    experiment_config.push_to_hub(subfolder=subfolder, repo_id=PUSH_REPO_ID, private=True)
+    benchmark_config.push_to_hub(subfolder=subfolder, repo_id=PUSH_REPO_ID, private=True)
 
     try:
-        benchmark_report = launch(experiment_config)
+        LOGGER.info(f"Running benchmark {benchmark_name} with model {model}")
+        benchmark_report = Benchmark.launch(benchmark_config)
         benchmark_report.push_to_hub(subfolder=subfolder, repo_id=PUSH_REPO_ID, private=True)
+        benchmark = Benchmark(config=benchmark_config, report=benchmark_report)
+        benchmark.push_to_hub(subfolder=subfolder, repo_id=PUSH_REPO_ID, private=True)
+
     except Exception as error:
-        os.chdir(CWD)  # TODO: figure our why this is happening
-        LOGGER.error(f"Experiment {experiment_name} failed with model {model}")
-        common_errors_reporter(error, LOGGER, subfolder, PUSH_REPO_ID)
+        LOGGER.error(f"Benchmark {benchmark_name} failed with model {model}")
+        valid_error, benchmark_report = errors_handler(str(error))
+
+        if valid_error:
+            LOGGER.error("The error is a valid one, reporting it")
+            LOGGER.error(benchmark_report.error)
+            benchmark_report.push_to_hub(subfolder=subfolder, repo_id=PUSH_REPO_ID, private=True)
+        else:
+            LOGGER.error("The error is not valid, need to investigate")
+            LOGGER.error(benchmark_report.error)
+            return
 
 
 if __name__ == "__main__":
-    if CANONICAL_MODELS_ONLY:
-        models_attentions_weights = list(product(CANONICAL_MODELS_LIST, ATTENTION_COFIGS, WEIGHTS_CONFIGS.keys()))
-        print(f"Total number of canonical models experiments: {len(models_attentions_weights)}")
-    else:
-        models_attentions_weights = list(product(PRETRAINED_MODELS_LIST, ATTENTION_COFIGS, WEIGHTS_CONFIGS.keys()))
-        print(f"Total number of pretrained models experiments: {len(models_attentions_weights)}")
+    setup_logging(level="INFO", format_prefix="MAIN-PROCESS")
+
+    models_attentions_weights = list(
+        product(CANONICAL_PRETRAINED_OPEN_LLM_LIST, ATTENTION_COFIGS, WEIGHTS_CONFIGS.keys())
+    )
+
+    LOGGER.info(
+        f"Running a total of {len(models_attentions_weights)} benchmarks, "
+        f"with {len(CANONICAL_PRETRAINED_OPEN_LLM_LIST)} models, "
+        f"{len(ATTENTION_COFIGS)} attentions implementations"
+        f"and {len(WEIGHTS_CONFIGS)} weights configurations"
+    )
 
     for model, attn_implementation, weights_config in models_attentions_weights:
         benchmark_cuda_pytorch(model, attn_implementation, weights_config)
diff --git a/llm_perf/update_open_llm_leaderboard.py b/llm_perf/update_open_llm_leaderboard.py
@@ -0,0 +1,42 @@
+import subprocess
+
+import pandas as pd
+from huggingface_hub import create_repo, upload_file
+
+scrapping_script = """
+git clone https://github.com/Weyaxi/scrape-open-llm-leaderboard.git
+pip install -r scrape-open-llm-leaderboard/requirements.txt
+python scrape-open-llm-leaderboard/main.py
+rm -rf scrape-open-llm-leaderboard
+"""
+
+
+def run_scrapper():
+    subprocess.run(scrapping_script, shell=True)
+
+
+def main():
+    run_scrapper()
+
+    open_llm_leaderboard = pd.read_csv("open-llm-leaderboard.csv")
+
+    if len(open_llm_leaderboard) > 0:
+        create_repo(
+            repo_id="optimum-benchmark/open-llm-leaderboard",
+            repo_type="dataset",
+            exist_ok=True,
+            private=False,
+        )
+        upload_file(
+            repo_id="optimum-benchmark/open-llm-leaderboard",
+            commit_message="Update open LLM leaderboard",
+            path_or_fileobj="open-llm-leaderboard.csv",
+            path_in_repo="open-llm-leaderboard.csv",
+            repo_type="dataset",
+        )
+    else:
+        raise ValueError("No models found")
+
+
+if __name__ == "__main__":
+    main()