diff --git a/.github/workflows/llm_perf_cuda_pytorch.yaml b/.github/workflows/update_llm_perf_cuda_pytorch.yaml
similarity index 85%
rename from .github/workflows/llm_perf_cuda_pytorch.yaml
rename to .github/workflows/update_llm_perf_cuda_pytorch.yaml
index 5b849f56..7fe2ef39 100644
--- a/.github/workflows/llm_perf_cuda_pytorch.yaml
+++ b/.github/workflows/update_llm_perf_cuda_pytorch.yaml
@@ -1,9 +1,8 @@
-name: LLM Perf Benchmarks - CUDA PyTorch
+name: Update LLM Perf Benchmarks - CUDA PyTorch
 
 on:
   workflow_dispatch:
   schedule:
-    # Every day at 00:00 UTC
     - cron: "0 0 * * *"
 
 concurrency:
@@ -11,7 +10,7 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  build_image_and_run_cuda_pytorch:
+  build_image_and_run_benchmarks:
     strategy:
       fail-fast: false
       matrix:
@@ -53,7 +52,7 @@ jobs:
             --volume ${{ github.workspace }}:/workspace
             --workdir /workspace
           run: |
-            pip install packaging && pip install flash-attn einops scipy auto-gptq optimum bitsandbytes autoawq
+            pip install packaging && pip install flash-attn einops scipy auto-gptq optimum bitsandbytes autoawq codecarbon
             pip install -U transformers huggingface_hub[hf_transfer]
-            pip install -e .[codecarbon]
-            python llm_perf/benchmark_cuda_pytorch.py
+            pip install -e .
+            python llm_perf/update_llm_perf_cuda_pytorch.py
diff --git a/.github/workflows/update_open_llm_leaderboard.yaml b/.github/workflows/update_open_llm_leaderboard.yaml
new file mode 100644
index 00000000..30fb40ca
--- /dev/null
+++ b/.github/workflows/update_open_llm_leaderboard.yaml
@@ -0,0 +1,33 @@
+name: Update Open LLM Leaderboard
+
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "0 0 * * *"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  update_open_llm_leaderboard:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v3
+        with:
+          python-version: "3.10"
+
+      - name: Install requirements
+        run: |
+          pip install --upgrade pip
+          pip install pandas huggingface-hub
+
+      - name: Update Open LLM Leaderboard
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: |
+          python llm_perf/update_open_llm_leaderboard.py
diff --git a/Makefile b/Makefile
index 2ab26568..b80d8144 100644
--- a/Makefile
+++ b/Makefile
@@ -173,9 +173,9 @@ test_cli_rocm_pytorch_single_gpu:
 # llm-perf
 
 install_llm_perf_cuda_pytorch:
-	pip install packaging && pip install flash-attn einops scipy auto-gptq optimum bitsandbytes autoawq
+	pip install packaging && pip install flash-attn einops scipy auto-gptq optimum bitsandbytes autoawq codecarbon
 	pip install -U transformers huggingface_hub[hf_transfer]
-	pip install -e .[codecarbon]
+	pip install -e .
 
 run_llm_perf_cuda_pytorch_unquantized:
 	SUBSET=unquantized python llm_perf/benchmark_cuda_pytorch.py
diff --git a/llm_perf/constants.py b/llm_perf/constants.py
deleted file mode 100644
index 13d4973d..00000000
--- a/llm_perf/constants.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import pandas as pd
-
-INPUT_SHAPES = {"batch_size": 1, "sequence_length": 256}
-GENERATE_KWARGS = {"max_new_tokens": 64, "min_new_tokens": 64}
-
-OPEN_LLM_DATAFRAME = pd.read_csv("hf://datasets/optimum/llm-perf-dataset/open-llm.csv")
-PRETRAINED_MODELS_LIST = OPEN_LLM_DATAFRAME.sort_values("Size", ascending=True)["Model"].tolist()
-
-CANONICAL_ORGANIZATIONS = [
-    # big companies
-    *["google", "facebook", "meta", "meta-llama", "microsoft", "Intel", "TencentARC", "Salesforce"],
-    # collectives
-    *["EleutherAI", "tiiuae", "NousResearch", "Open-Orca"],
-    # HF related
-    ["bigcode", "HuggingFaceH4"],
-    # community members
-    ["teknium"],
-    # startups
-    *[
-        "mistral-community",
-        "openai-community",
-        "togethercomputer",
-        "stabilityai",
-        "CohereForAI",
-        "databricks",
-        "mistralai",
-        "internlm",
-        "Upstage",
-        "xai-org",
-        "Phind",
-        "01-ai",
-        "Deci",
-        "Qwen",
-    ],
-]
-CANONICAL_MODELS_LIST = [model for model in PRETRAINED_MODELS_LIST if model.split("/")[0] in CANONICAL_ORGANIZATIONS]
diff --git a/llm_perf/benchmark_cuda_pytorch.py b/llm_perf/update_llm_perf_cuda_pytorch.py
similarity index 51%
rename from llm_perf/benchmark_cuda_pytorch.py
rename to llm_perf/update_llm_perf_cuda_pytorch.py
index 64a40c60..02a79296 100644
--- a/llm_perf/benchmark_cuda_pytorch.py
+++ b/llm_perf/update_llm_perf_cuda_pytorch.py
@@ -2,21 +2,32 @@
 from itertools import product
 from logging import getLogger
 
-from llm_perf.constants import CANONICAL_MODELS_LIST, GENERATE_KWARGS, INPUT_SHAPES, PRETRAINED_MODELS_LIST
-from llm_perf.utils import common_errors_reporter, is_experiment_conducted, is_experiment_not_supported
-from optimum_benchmark.backends.pytorch.config import PyTorchConfig
-from optimum_benchmark.benchmarks.inference.config import InferenceConfig
-from optimum_benchmark.experiment import ExperimentConfig, launch
-from optimum_benchmark.launchers.process.config import ProcessConfig
+from llm_perf.utils import (
+    CANONICAL_PRETRAINED_OPEN_LLM_LIST,
+    GENERATE_KWARGS,
+    INPUT_SHAPES,
+    OPEN_LLM_LIST,
+    PRETRAINED_OPEN_LLM_LIST,
+    errors_handler,
+    is_benchmark_conducted,
+    is_benchmark_supported,
+)
+from optimum_benchmark import Benchmark, BenchmarkConfig, InferenceConfig, ProcessConfig, PyTorchConfig
 from optimum_benchmark.logging_utils import setup_logging
 
-CWD = os.getcwd()
-MACHINE = os.getenv("MACHINE", "1xA100")
-SUBSET = os.getenv("SUBSET", "unquantized")
-CANONICAL_MODELS_ONLY = os.getenv("CANONICAL_MODELS_ONLY", "1") == "1"
-PUSH_REPO_ID = f"optimum-benchmark/llm-perf-pytorch-cuda-{SUBSET}-{MACHINE}"
+SUBSET = os.getenv("SUBSET", None)
+MACHINE = os.getenv("MACHINE", None)
 
 
+if os.getenv("MACHINE", None) is None and os.getenv("SUBSET", None) is None:
+    PUSH_REPO_ID = "optimum-benchmark/llm-perf-pytorch-cuda-debug"
+    CANONICAL_PRETRAINED_OPEN_LLM_LIST = ["gpt2"]
+    SUBSET = "unquantized"
+elif os.getenv("MACHINE", None) is not None and os.getenv("SUBSET", None) is not None:
+    PUSH_REPO_ID = f"optimum-benchmark/llm-perf-pytorch-cuda-{SUBSET}-{MACHINE}"
+else:
+    raise ValueError("Either both MACHINE and SUBSET should be set for benchmarking or neither for debugging")
+
 ATTENTION_COFIGS = ["eager", "sdpa", "flash_attention_2"]
 if SUBSET == "unquantized":
     WEIGHTS_CONFIGS = {
@@ -79,25 +90,26 @@
     }
 
 
-setup_logging()
 LOGGER = getLogger("llm-perf-backend")
+LOGGER.info(f"len(OPEN_LLM_LIST): {len(OPEN_LLM_LIST)}")
+LOGGER.info(f"len(PRETRAINED_OPEN_LLM_LIST): {len(PRETRAINED_OPEN_LLM_LIST)}")
+LOGGER.info(f"len(CANONICAL_PRETRAINED_OPEN_LLM_LIST): {len(CANONICAL_PRETRAINED_OPEN_LLM_LIST)}")
 
 
 def benchmark_cuda_pytorch(model, attn_implementation, weights_config):
+    benchmark_name = f"{weights_config}-{attn_implementation}"
+    subfolder = f"{benchmark_name}/{model.replace('/', '--')}"
+
     torch_dtype = WEIGHTS_CONFIGS[weights_config]["torch_dtype"]
     quant_scheme = WEIGHTS_CONFIGS[weights_config]["quant_scheme"]
     quant_config = WEIGHTS_CONFIGS[weights_config]["quant_config"]
 
-    if is_experiment_not_supported(torch_dtype, attn_implementation):
-        LOGGER.info(f"Skipping experiment with model {model} since it is not supported")
+    if not is_benchmark_supported(weights_config, attn_implementation):
+        LOGGER.info(f"Skipping benchmark {benchmark_name} with model {model} since it is not supported")
         return
 
-    launcher_config = ProcessConfig(
-        start_method="spawn",
-        device_isolation=True,
-        device_isolation_action="error",
-    )
-    benchmark_config = InferenceConfig(
+    launcher_config = ProcessConfig(device_isolation=True, device_isolation_action="kill")
+    scenario_config = InferenceConfig(
         memory=True,
         energy=True,
         latency=True,
@@ -118,40 +130,53 @@ def benchmark_cuda_pytorch(model, attn_implementation, weights_config):
         quantization_scheme=quant_scheme,
         quantization_config=quant_config,
         attn_implementation=attn_implementation,
+        hub_kwargs={"trust_remote_code": True},
     )
 
-    experiment_name = f"{weights_config}-{attn_implementation}"
-    subfolder = f"{experiment_name}/{model.replace('/', '--')}"
-
-    experiment_config = ExperimentConfig(
-        experiment_name=experiment_name,
-        benchmark=benchmark_config,
-        launcher=launcher_config,
-        backend=backend_config,
+    benchmark_config = BenchmarkConfig(
+        name=benchmark_name, scenario=scenario_config, launcher=launcher_config, backend=backend_config
     )
 
-    if is_experiment_conducted(experiment_config, PUSH_REPO_ID, subfolder):
-        LOGGER.info(f"Skipping experiment {experiment_name} with model {model} since it was already conducted")
+    if is_benchmark_conducted(benchmark_config, PUSH_REPO_ID, subfolder):
+        LOGGER.info(f"Skipping benchmark {benchmark_name} with model {model} since it was already conducted")
         return
 
-    experiment_config.push_to_hub(subfolder=subfolder, repo_id=PUSH_REPO_ID, private=True)
+    benchmark_config.push_to_hub(subfolder=subfolder, repo_id=PUSH_REPO_ID, private=True)
 
     try:
-        benchmark_report = launch(experiment_config)
+        LOGGER.info(f"Running benchmark {benchmark_name} with model {model}")
+        benchmark_report = Benchmark.launch(benchmark_config)
         benchmark_report.push_to_hub(subfolder=subfolder, repo_id=PUSH_REPO_ID, private=True)
+        benchmark = Benchmark(config=benchmark_config, report=benchmark_report)
+        benchmark.push_to_hub(subfolder=subfolder, repo_id=PUSH_REPO_ID, private=True)
+
     except Exception as error:
-        os.chdir(CWD)  # TODO: figure our why this is happening
-        LOGGER.error(f"Experiment {experiment_name} failed with model {model}")
-        common_errors_reporter(error, LOGGER, subfolder, PUSH_REPO_ID)
+        LOGGER.error(f"Benchmark {benchmark_name} failed with model {model}")
+        valid_error, benchmark_report = errors_handler(str(error))
+
+        if valid_error:
+            LOGGER.error("The error is a valid one, reporting it")
+            LOGGER.error(benchmark_report.error)
+            benchmark_report.push_to_hub(subfolder=subfolder, repo_id=PUSH_REPO_ID, private=True)
+        else:
+            LOGGER.error("The error is not valid, need to investigate")
+            LOGGER.error(benchmark_report.error)
+            return
 
 
 if __name__ == "__main__":
-    if CANONICAL_MODELS_ONLY:
-        models_attentions_weights = list(product(CANONICAL_MODELS_LIST, ATTENTION_COFIGS, WEIGHTS_CONFIGS.keys()))
-        print(f"Total number of canonical models experiments: {len(models_attentions_weights)}")
-    else:
-        models_attentions_weights = list(product(PRETRAINED_MODELS_LIST, ATTENTION_COFIGS, WEIGHTS_CONFIGS.keys()))
-        print(f"Total number of pretrained models experiments: {len(models_attentions_weights)}")
+    setup_logging(level="INFO", format_prefix="MAIN-PROCESS")
+
+    models_attentions_weights = list(
+        product(CANONICAL_PRETRAINED_OPEN_LLM_LIST, ATTENTION_COFIGS, WEIGHTS_CONFIGS.keys())
+    )
+
+    LOGGER.info(
+        f"Running a total of {len(models_attentions_weights)} benchmarks, "
+        f"with {len(CANONICAL_PRETRAINED_OPEN_LLM_LIST)} models, "
+        f"{len(ATTENTION_COFIGS)} attentions implementations"
+        f"and {len(WEIGHTS_CONFIGS)} weights configurations"
+    )
 
     for model, attn_implementation, weights_config in models_attentions_weights:
         benchmark_cuda_pytorch(model, attn_implementation, weights_config)
diff --git a/llm_perf/update_open_llm_leaderboard.py b/llm_perf/update_open_llm_leaderboard.py
new file mode 100644
index 00000000..0ea0827e
--- /dev/null
+++ b/llm_perf/update_open_llm_leaderboard.py
@@ -0,0 +1,42 @@
+import subprocess
+
+import pandas as pd
+from huggingface_hub import create_repo, upload_file
+
+scrapping_script = """
+git clone https://github.com/Weyaxi/scrape-open-llm-leaderboard.git
+pip install -r scrape-open-llm-leaderboard/requirements.txt
+python scrape-open-llm-leaderboard/main.py
+rm -rf scrape-open-llm-leaderboard
+"""
+
+
+def run_scrapper():
+    subprocess.run(scrapping_script, shell=True)
+
+
+def main():
+    run_scrapper()
+
+    open_llm_leaderboard = pd.read_csv("open-llm-leaderboard.csv")
+
+    if len(open_llm_leaderboard) > 0:
+        create_repo(
+            repo_id="optimum-benchmark/open-llm-leaderboard",
+            repo_type="dataset",
+            exist_ok=True,
+            private=False,
+        )
+        upload_file(
+            repo_id="optimum-benchmark/open-llm-leaderboard",
+            commit_message="Update open LLM leaderboard",
+            path_or_fileobj="open-llm-leaderboard.csv",
+            path_in_repo="open-llm-leaderboard.csv",
+            repo_type="dataset",
+        )
+    else:
+        raise ValueError("No models found")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/llm_perf/utils.py b/llm_perf/utils.py
index 2e1dfbbc..071b66b2 100644
--- a/llm_perf/utils.py
+++ b/llm_perf/utils.py
@@ -1,41 +1,170 @@
-from optimum_benchmark.benchmarks.report import BenchmarkReport
+from typing import Tuple
 
+import pandas as pd
 
-def common_errors_reporter(error, logger, subfolder, push_repo_id):
-    benchmark_report = BenchmarkReport.from_targets(["decode", "prefill", "per_token", "error"])
+from optimum_benchmark.report import BenchmarkReport
 
-    if "torch.cuda.OutOfMemoryError" in str(error):
-        logger.error("CUDA: Out of memory")
+OPEN_LLM_LEADERBOARD = pd.read_csv("hf://datasets/optimum-benchmark/open-llm-leaderboard/open-llm-leaderboard.csv")
+
+
+INPUT_SHAPES = {"batch_size": 1, "sequence_length": 256}
+GENERATE_KWARGS = {"max_new_tokens": 64, "min_new_tokens": 64}
+
+
+CANONICAL_ORGANIZATIONS = [
+    # big companies
+    *["google", "facebook", "meta", "meta-llama", "microsoft", "Intel", "TencentARC", "Salesforce"],
+    # collectives
+    *["EleutherAI", "tiiuae", "NousResearch", "Open-Orca"],
+    # HF related
+    ["bigcode", "HuggingFaceH4", "huggyllama"],
+    # community members
+    ["teknium"],
+    # startups
+    *[
+        "mistral-community",
+        "openai-community",
+        "togethercomputer",
+        "stabilityai",
+        "CohereForAI",
+        "databricks",
+        "mistralai",
+        "internlm",
+        "Upstage",
+        "xai-org",
+        "Phind",
+        "01-ai",
+        "Deci",
+        "Qwen",
+    ],
+]
+
+
+OPEN_LLM_LIST = OPEN_LLM_LEADERBOARD.drop_duplicates(subset=["Model"])["Model"].tolist()
+PRETRAINED_OPEN_LLM_LIST = (
+    OPEN_LLM_LEADERBOARD[OPEN_LLM_LEADERBOARD["Type"] == "pretrained"]
+    .drop_duplicates(subset=["Model"])["Model"]
+    .tolist()
+)
+# CANONICAL_PRETRAINED_OPEN_LLM_LIST = sorted(
+#     [model for model in PRETRAINED_OPEN_LLM_LIST if model.split("/")[0] in CANONICAL_ORGANIZATIONS]
+# )
+
+CANONICAL_PRETRAINED_OPEN_LLM_LIST = [
+    "01-ai/Yi-34B",
+    "01-ai/Yi-6B",
+    "Deci/DeciCoder-1b",
+    "Deci/DeciLM-7B",
+    "EleutherAI/gpt-j-6b",
+    "EleutherAI/gpt-neo-1.3B",
+    "EleutherAI/gpt-neo-125m",
+    "EleutherAI/gpt-neo-2.7B",
+    "EleutherAI/gpt-neox-20b",
+    "EleutherAI/polyglot-ko-12.8b",
+    "EleutherAI/pythia-1.3b",
+    "EleutherAI/pythia-1.4b",
+    # "EleutherAI/pythia-1.4b-deduped",
+    "EleutherAI/pythia-12b",
+    # "EleutherAI/pythia-12b-deduped",
+    "EleutherAI/pythia-160m",
+    # "EleutherAI/pythia-160m-deduped",
+    # "EleutherAI/pythia-1b-deduped",
+    "EleutherAI/pythia-2.7b",
+    # "EleutherAI/pythia-2.8b-deduped",
+    "EleutherAI/pythia-410m",
+    # "EleutherAI/pythia-410m-deduped",
+    "EleutherAI/pythia-6.7b",
+    # "EleutherAI/pythia-6.9b-deduped",
+    "EleutherAI/pythia-70m",
+    # "EleutherAI/pythia-70m-deduped",
+    "Qwen/Qwen-14B",
+    "Qwen/Qwen-72B",
+    "Qwen/Qwen-7B",
+    "Qwen/Qwen1.5-0.5B",
+    "Qwen/Qwen1.5-1.8B",
+    "Qwen/Qwen1.5-110B",
+    "Qwen/Qwen1.5-14B",
+    "Qwen/Qwen1.5-32B",
+    "Qwen/Qwen1.5-4B",
+    "Qwen/Qwen1.5-72B",
+    "Qwen/Qwen1.5-7B",
+    # "Qwen/Qwen1.5-7B-Chat",
+    "Qwen/Qwen1.5-MoE-A2.7B",
+    "Qwen/Qwen2-beta-14B",
+    "Qwen/Qwen2-beta-72B",
+    "Salesforce/codegen-16B-nl",
+    # "Salesforce/codegen-6B-multi",
+    "Salesforce/codegen-6B-nl",
+    "TencentARC/Mistral_Pro_8B_v0.1",
+    "databricks/dbrx-base",
+    "facebook/opt-125m",
+    "facebook/opt-13b",
+    "facebook/opt-2.7b",
+    "facebook/opt-30b",
+    "facebook/opt-350m",
+    "facebook/opt-6.7b",
+    "facebook/opt-66b",
+    "facebook/xglm-4.5B",
+    "facebook/xglm-564M",
+    "facebook/xglm-7.5B",
+    "google/gemma-7b",
+    "google/recurrentgemma-2b",
+    "internlm/internlm-20b",
+    "internlm/internlm2-20b",
+    "meta-llama/Llama-2-13b-hf",
+    "meta-llama/Llama-2-7b-hf",
+    "meta-llama/Meta-Llama-3-8B",
+    "meta-llama/Meta-Llama-3-70B",
+    "microsoft/phi-1_5",
+    "microsoft/rho-math-1b-v0.1",
+    "mistralai/Mistral-7B-v0.1",
+    "mistralai/Mixtral-8x22B-v0.1",
+    "mistralai/Mixtral-8x7B-v0.1",
+    "openai-community/gpt2",
+    "openai-community/gpt2-large",
+    "stabilityai/stablelm-2-12b",
+    "stabilityai/stablelm-2-1_6b",
+    "stabilityai/stablelm-3b-4e1t",
+    "stabilityai/stablelm-base-alpha-3b",
+    "stabilityai/stablelm-base-alpha-7b",
+    # "stabilityai/stablelm-base-alpha-7b-v2",
+    "tiiuae/falcon-180B",
+    "tiiuae/falcon-40b",
+    "tiiuae/falcon-7b",
+    "tiiuae/falcon-rw-1b",
+    # "togethercomputer/RedPajama-INCITE-7B-Base",
+    "togethercomputer/RedPajama-INCITE-Base-3B-v1",
+    "togethercomputer/RedPajama-INCITE-Base-7B-v0.1",
+]
+
+
+def errors_handler(error: str) -> Tuple[bool, BenchmarkReport]:
+    valid_error = True
+    benchmark_report = BenchmarkReport.from_list(["error"])
+
+    if "torch.cuda.OutOfMemoryError" in error:
         benchmark_report.error = "CUDA: Out of memory"
-        benchmark_report.push_to_hub(subfolder=subfolder, repo_id=push_repo_id, private=True)
-    elif "gptq" in str(error) and "assert outfeatures % 32 == 0" in str(error):
-        logger.error("GPTQ: assert outfeatures % 32 == 0")
+    elif "gptq" in error and "assert outfeatures % 32 == 0" in error:
         benchmark_report.error = "GPTQ: assert outfeatures % 32 == 0"
-        benchmark_report.push_to_hub(subfolder=subfolder, repo_id=push_repo_id, private=True)
-    elif "gptq" in str(error) and "assert infeatures % self.group_size == 0" in str(error):
-        logger.error("GPTQ: assert infeatures % self.group_size == 0")
+    elif "gptq" in error and "assert infeatures % self.group_size == 0" in error:
         benchmark_report.error = "GPTQ: assert infeatures % self.group_size == 0"
-        benchmark_report.push_to_hub(subfolder=subfolder, repo_id=push_repo_id, private=True)
-    elif "support Flash Attention 2.0" in str(error):
-        logger.error("Flash Attention 2.0: not supported yet")
+    elif "support Flash Attention 2.0" in error:
         benchmark_report.error = "Flash Attention 2.0: not supported yet"
-        benchmark_report.push_to_hub(subfolder=subfolder, repo_id=push_repo_id, private=True)
-    elif "support an attention implementation through torch.nn.functional.scaled_dot_product_attention" in str(error):
-        logger.error("SDPA: not supported yet")
+    elif "support an attention implementation through torch.nn.functional.scaled_dot_product_attention" in error:
         benchmark_report.error = "SDPA: not supported yet"
-        benchmark_report.push_to_hub(subfolder=subfolder, repo_id=push_repo_id, private=True)
-    elif "FlashAttention only support fp16 and bf16 data type" in str(error):
-        logger.error("FlashAttention: only support fp16 and bf16 data type")
+    elif "FlashAttention only support fp16 and bf16 data type" in error:
         benchmark_report.error = "FlashAttention: only support fp16 and bf16 data type"
-        benchmark_report.push_to_hub(subfolder=subfolder, repo_id=push_repo_id, private=True)
     else:
-        logger.error(f"Unknown error: {error}")
+        benchmark_report.error = f"Unknown error: {error}"
+        valid_error = False
+
+    return valid_error, benchmark_report
 
 
-def is_experiment_conducted(experiment_config, push_repo_id, subfolder):
+def is_benchmark_conducted(benchmark_config, push_repo_id, subfolder):
     try:
-        loaded_experiment_config = experiment_config.from_pretrained(repo_id=push_repo_id, subfolder=subfolder)
-        if loaded_experiment_config.to_dict() == experiment_config.to_dict():
+        loaded_benchmark_config = benchmark_config.from_pretrained(repo_id=push_repo_id, subfolder=subfolder)
+        if loaded_benchmark_config.to_dict() == benchmark_config.to_dict():
             BenchmarkReport.from_pretrained(repo_id=push_repo_id, subfolder=subfolder)
             return True
     except Exception:
@@ -44,8 +173,8 @@ def is_experiment_conducted(experiment_config, push_repo_id, subfolder):
     return False
 
 
-def is_experiment_not_supported(torch_dtype, attn_implementation):
-    if attn_implementation == "flash_attention_2" and torch_dtype == "float32":
-        return True
+def is_benchmark_supported(weights_config, attn_implementation):
+    if attn_implementation == "flash_attention_2" and weights_config == "float32":
+        return False
 
-    return False
+    return True
diff --git a/optimum_benchmark/base.py b/optimum_benchmark/base.py
index eb47a0f5..2188716c 100644
--- a/optimum_benchmark/base.py
+++ b/optimum_benchmark/base.py
@@ -7,7 +7,7 @@
 from .backends.base import Backend
 from .backends.config import BackendConfig
 from .config import BenchmarkConfig
-from .hub_utils import PushToHubMixin
+from .hub_utils import PushToHubMixin, classproperty
 from .launchers import LauncherConfig
 from .launchers.base import Launcher
 from .report import BenchmarkReport
@@ -75,3 +75,7 @@ def run(cls, config: BenchmarkConfig):
             backend.cleanup()
 
         return report
+
+    @classproperty
+    def default_filename(cls) -> str:
+        return "benchmark.json"
diff --git a/optimum_benchmark/hub_utils.py b/optimum_benchmark/hub_utils.py
index 5a2dd9f2..afc5b100 100644
--- a/optimum_benchmark/hub_utils.py
+++ b/optimum_benchmark/hub_utils.py
@@ -2,7 +2,6 @@
 import tempfile
 from dataclasses import asdict, dataclass
 from json import dump, load
-from logging import getLogger
 from typing import Any, Dict, Optional
 
 import pandas as pd
@@ -10,8 +9,6 @@
 from huggingface_hub import create_repo, hf_hub_download, upload_file
 from typing_extensions import Self
 
-LOGGER = getLogger(__name__)
-
 
 class classproperty:
     def __init__(self, fget):
@@ -97,7 +94,6 @@ def push_to_hub(
             path_in_repo = os.path.join(subfolder, filename)
             self.save_json(path_or_fileobj)
 
-            LOGGER.info(f"Pushing {path_or_fileobj} to {repo_id}:{path_in_repo}")
             upload_file(
                 repo_id=repo_id,
                 path_in_repo=path_in_repo,