From 30a44728ab9fa6a29862039cc48e75913bd5b797 Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Tue, 26 Nov 2024 15:17:41 +0100
Subject: [PATCH 01/16] fix llamacpp and windows libuv

---
 optimum_benchmark/launchers/torchrun/launcher.py | 10 ++++++----
 tests/configs/_gguf_.yaml                        |  4 ++--
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/optimum_benchmark/launchers/torchrun/launcher.py b/optimum_benchmark/launchers/torchrun/launcher.py
index 98c076ee..b24c1c79 100644
--- a/optimum_benchmark/launchers/torchrun/launcher.py
+++ b/optimum_benchmark/launchers/torchrun/launcher.py
@@ -24,6 +24,10 @@ class TorchrunLauncher(Launcher[TorchrunConfig]):
     def __init__(self, config: TorchrunConfig):
         super().__init__(config)
 
+        if sys.platform == "win32":
+            self.logger.info("\t+ Disabline libuv on Windows")
+            os.environ["USE_LIBUV"] = "0"
+
         if get_start_method(allow_none=True) != self.config.start_method:
             self.logger.info(f"\t+ Setting multiprocessing start method to {self.config.start_method}")
             set_start_method(self.config.start_method, force=True)
@@ -101,7 +105,7 @@ def launch(self, worker: Callable[..., BenchmarkReport], worker_args: List[Any])
                 raise RuntimeError(f"Received an unexpected response from isolated process: {output}")
 
         self.logger.info("\t+ Aggregating reports from all rank processes")
-        report = BenchmarkReport.aggregate(reports)
+        report = BenchmarkReport.aggregate_across_processes(reports)
         return report
 
 
@@ -155,9 +159,7 @@ def entrypoint(worker: Callable[..., BenchmarkReport], worker_args: List[Any], l
     else:
         setup_logging(level="ERROR", to_file=log_to_file, prefix=f"RANK-PROCESS-{rank}")
 
-    if sys.platform == "win32":
-        logger.info("\t+ Disabline libuv on Windows")
-        os.environ["USE_LIBUV"] = "0"
+
 
     if torch.cuda.is_available():
         logger.info(f"\t+ Setting torch.distributed cuda device to {rank}")
diff --git a/tests/configs/_gguf_.yaml b/tests/configs/_gguf_.yaml
index 007a03e7..41ef8027 100644
--- a/tests/configs/_gguf_.yaml
+++ b/tests/configs/_gguf_.yaml
@@ -2,6 +2,6 @@ hydra:
   mode: MULTIRUN
   sweeper:
     params:
+      backend.model: ggml-org/models
       backend.task: text-generation,feature-extraction
-      backend.model: QuantFactory/gpt2-GGUF
-      backend.filename: gpt2.Q4_0.gguf
+      backend.filename: tinyllamas/stories15M-q8_0.gguf

From f4ee5b4b7e383259a9675b890de8320d8111c5d4 Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Tue, 26 Nov 2024 15:19:54 +0100
Subject: [PATCH 02/16] fix

---
 optimum_benchmark/launchers/torchrun/launcher.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/optimum_benchmark/launchers/torchrun/launcher.py b/optimum_benchmark/launchers/torchrun/launcher.py
index b24c1c79..98eb4a37 100644
--- a/optimum_benchmark/launchers/torchrun/launcher.py
+++ b/optimum_benchmark/launchers/torchrun/launcher.py
@@ -105,7 +105,7 @@ def launch(self, worker: Callable[..., BenchmarkReport], worker_args: List[Any])
                 raise RuntimeError(f"Received an unexpected response from isolated process: {output}")
 
         self.logger.info("\t+ Aggregating reports from all rank processes")
-        report = BenchmarkReport.aggregate_across_processes(reports)
+        report = BenchmarkReport.aggregate(reports)
         return report
 
 
@@ -159,8 +159,6 @@ def entrypoint(worker: Callable[..., BenchmarkReport], worker_args: List[Any], l
     else:
         setup_logging(level="ERROR", to_file=log_to_file, prefix=f"RANK-PROCESS-{rank}")
 
-
-
     if torch.cuda.is_available():
         logger.info(f"\t+ Setting torch.distributed cuda device to {rank}")
         device = torch.device("cuda", rank)

From 7c7d729f42fabf75d4a5aff54fa8f925954ac987 Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Tue, 26 Nov 2024 15:34:06 +0100
Subject: [PATCH 03/16] fix

---
 optimum_benchmark/backends/base.py               |  7 ++-----
 optimum_benchmark/backends/llama_cpp/backend.py  |  9 ++-------
 optimum_benchmark/launchers/torchrun/launcher.py | 12 +++++++-----
 3 files changed, 11 insertions(+), 17 deletions(-)

diff --git a/optimum_benchmark/backends/base.py b/optimum_benchmark/backends/base.py
index 6726f91f..1c039163 100644
--- a/optimum_benchmark/backends/base.py
+++ b/optimum_benchmark/backends/base.py
@@ -70,14 +70,11 @@ def __init__(self, config: BackendConfigT):
 
         elif self.config.library == "llama_cpp":
             self.logger.info("\t+ Benchmarking a LlamaCpp model")
-            # TOD: need a custom method to extract shapes from gguf
-            self.model_shapes = extract_transformers_shapes_from_artifacts(
-                self.pretrained_config, self.pretrained_processor
-            )
             self.pretrained_processor = None
-            self.generation_config = None
             self.pretrained_config = None
+            self.generation_config = None
             self.automodel_loader = None
+            self.model_shapes = {}
 
         else:
             self.logger.info("\t+ Benchmarking a Transformers model")
diff --git a/optimum_benchmark/backends/llama_cpp/backend.py b/optimum_benchmark/backends/llama_cpp/backend.py
index 06215cbf..c9d6bbf8 100644
--- a/optimum_benchmark/backends/llama_cpp/backend.py
+++ b/optimum_benchmark/backends/llama_cpp/backend.py
@@ -41,15 +41,10 @@ def llama_cpp_kwargs(self) -> Dict[str, Any]:
             "echo": False,
         }
 
-    def prepare_input_shapes(self, input_shapes: Dict[str, Any]) -> Dict[str, Any]:
-        if self.config.task == "text-generation":
-            if input_shapes["batch_size"] != 1:
-                raise ValueError("Batch size must be 1 for LlamaCpp text generation")
-
-        return input_shapes
-
     def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
         if self.config.task == "text-generation":
+            if inputs["input_ids"].shape[0] != 1:
+                raise ValueError("Batch size must be 1 for LlamaCpp text generation")
             return {"tokens": inputs["input_ids"].squeeze(0).tolist()}
 
         elif self.config.task == "feature-extraction":
diff --git a/optimum_benchmark/launchers/torchrun/launcher.py b/optimum_benchmark/launchers/torchrun/launcher.py
index 98eb4a37..768ea1c7 100644
--- a/optimum_benchmark/launchers/torchrun/launcher.py
+++ b/optimum_benchmark/launchers/torchrun/launcher.py
@@ -24,10 +24,6 @@ class TorchrunLauncher(Launcher[TorchrunConfig]):
     def __init__(self, config: TorchrunConfig):
         super().__init__(config)
 
-        if sys.platform == "win32":
-            self.logger.info("\t+ Disabline libuv on Windows")
-            os.environ["USE_LIBUV"] = "0"
-
         if get_start_method(allow_none=True) != self.config.start_method:
             self.logger.info(f"\t+ Setting multiprocessing start method to {self.config.start_method}")
             set_start_method(self.config.start_method, force=True)
@@ -164,8 +160,14 @@ def entrypoint(worker: Callable[..., BenchmarkReport], worker_args: List[Any], l
         device = torch.device("cuda", rank)
         torch.cuda.set_device(device)
 
+    if sys.platform == "win32":
+        logger.info("\t+ Disabling libuv for Windows")
+        init_method = "env://?use_libuv=0"
+    else:
+        init_method = "env://"
+
     logger.info("\t+ Initializing torch.distributed process group")
-    torch.distributed.init_process_group()
+    torch.distributed.init_process_group(init_method=init_method)
 
     try:
         report = worker(*worker_args)

From 2e97648d43157c7e746c403a230aa528ac8cdefa Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Tue, 26 Nov 2024 15:50:32 +0100
Subject: [PATCH 04/16] double

---
 optimum_benchmark/launchers/torchrun/launcher.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/optimum_benchmark/launchers/torchrun/launcher.py b/optimum_benchmark/launchers/torchrun/launcher.py
index 768ea1c7..8a20a5c7 100644
--- a/optimum_benchmark/launchers/torchrun/launcher.py
+++ b/optimum_benchmark/launchers/torchrun/launcher.py
@@ -129,6 +129,10 @@ def target(
     else:
         raise RuntimeError("Could not synchronize with main process")
 
+    if sys.platform == "win32":
+        logger.info("\t+ Disabline libuv on Windows")
+        os.environ["USE_LIBUV"] = "0"
+
     try:
         elastic_agent_launcher = elastic_launch(config=config, entrypoint=entrypoint)
         outputs = elastic_agent_launcher(worker, worker_args, logger)
@@ -155,19 +159,17 @@ def entrypoint(worker: Callable[..., BenchmarkReport], worker_args: List[Any], l
     else:
         setup_logging(level="ERROR", to_file=log_to_file, prefix=f"RANK-PROCESS-{rank}")
 
+    if sys.platform == "win32":
+        logger.info("\t+ Disabline libuv on Windows")
+        os.environ["USE_LIBUV"] = "0"
+
     if torch.cuda.is_available():
         logger.info(f"\t+ Setting torch.distributed cuda device to {rank}")
         device = torch.device("cuda", rank)
         torch.cuda.set_device(device)
 
-    if sys.platform == "win32":
-        logger.info("\t+ Disabling libuv for Windows")
-        init_method = "env://?use_libuv=0"
-    else:
-        init_method = "env://"
-
     logger.info("\t+ Initializing torch.distributed process group")
-    torch.distributed.init_process_group(init_method=init_method)
+    torch.distributed.init_process_group()
 
     try:
         report = worker(*worker_args)

From b7a28487172e0d6523b661d822bc0dfd3fe6adf6 Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Tue, 26 Nov 2024 15:57:15 +0100
Subject: [PATCH 05/16] just skip torchrun on windows

---
 optimum_benchmark/launchers/torchrun/launcher.py | 8 --------
 tests/test_cli.py                                | 6 ++++++
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/optimum_benchmark/launchers/torchrun/launcher.py b/optimum_benchmark/launchers/torchrun/launcher.py
index 8a20a5c7..ee49e295 100644
--- a/optimum_benchmark/launchers/torchrun/launcher.py
+++ b/optimum_benchmark/launchers/torchrun/launcher.py
@@ -129,10 +129,6 @@ def target(
     else:
         raise RuntimeError("Could not synchronize with main process")
 
-    if sys.platform == "win32":
-        logger.info("\t+ Disabline libuv on Windows")
-        os.environ["USE_LIBUV"] = "0"
-
     try:
         elastic_agent_launcher = elastic_launch(config=config, entrypoint=entrypoint)
         outputs = elastic_agent_launcher(worker, worker_args, logger)
@@ -159,10 +155,6 @@ def entrypoint(worker: Callable[..., BenchmarkReport], worker_args: List[Any], l
     else:
         setup_logging(level="ERROR", to_file=log_to_file, prefix=f"RANK-PROCESS-{rank}")
 
-    if sys.platform == "win32":
-        logger.info("\t+ Disabline libuv on Windows")
-        os.environ["USE_LIBUV"] = "0"
-
     if torch.cuda.is_available():
         logger.info(f"\t+ Setting torch.distributed cuda device to {rank}")
         device = torch.device("cuda", rank)
diff --git a/tests/test_cli.py b/tests/test_cli.py
index c18b26fb..3a510806 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -53,6 +53,9 @@ def test_cli_configs(config_name):
 
 @pytest.mark.parametrize("launcher", ["inline", "process", "torchrun"])
 def test_cli_exit_code_0(launcher):
+    if launcher == "torchrun" and sys.platform == "win32":
+        pytest.skip("torchrun is not supported on Windows")
+
     args_0 = [
         "optimum-benchmark",
         "--config-dir",
@@ -73,6 +76,9 @@ def test_cli_exit_code_0(launcher):
 
 @pytest.mark.parametrize("launcher", ["inline", "process", "torchrun"])
 def test_cli_exit_code_1(launcher):
+    if launcher == "torchrun" and sys.platform == "win32":
+        pytest.skip("torchrun is not supported on Windows")
+
     args_1 = [
         "optimum-benchmark",
         "--config-dir",

From b64a5146ca832cd05e04131a4f52ddee9f5e389a Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Tue, 26 Nov 2024 16:01:06 +0100
Subject: [PATCH 06/16] style

---
 optimum_benchmark/backends/pytorch/backend.py    | 2 +-
 optimum_benchmark/launchers/torchrun/launcher.py | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/optimum_benchmark/backends/pytorch/backend.py b/optimum_benchmark/backends/pytorch/backend.py
index fcf522b5..bfbe9745 100644
--- a/optimum_benchmark/backends/pytorch/backend.py
+++ b/optimum_benchmark/backends/pytorch/backend.py
@@ -24,7 +24,7 @@
 from .config import PyTorchConfig
 
 if is_deepspeed_available():
-    import deepspeed
+    import deepspeed  # type: ignore
 
 if is_torch_distributed_available():
     import torch.distributed
diff --git a/optimum_benchmark/launchers/torchrun/launcher.py b/optimum_benchmark/launchers/torchrun/launcher.py
index ee49e295..10b45d4d 100644
--- a/optimum_benchmark/launchers/torchrun/launcher.py
+++ b/optimum_benchmark/launchers/torchrun/launcher.py
@@ -1,5 +1,4 @@
 import os
-import sys
 import traceback
 from contextlib import ExitStack
 from logging import Logger

From 13bc8c0733c2234e14d3aa753e0ed84e8e22e67d Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Wed, 27 Nov 2024 10:30:51 +0100
Subject: [PATCH 07/16] remove dp tp distinction

---
 optimum_benchmark/backends/ipex/backend.py    |  21 +--
 .../backends/onnxruntime/backend.py           |  16 +-
 .../backends/openvino/backend.py              |  49 +++---
 optimum_benchmark/backends/pytorch/backend.py |  45 ++----
 .../backends/transformers_utils.py            |  43 +++---
 optimum_benchmark/benchmark/report.py         |  26 +++-
 .../generators/task_generator.py              |   2 -
 .../launchers/torchrun/launcher.py            |   2 +-
 .../scenarios/energy_star/scenario.py         |  56 +++----
 .../scenarios/inference/config.py             |   1 -
 .../scenarios/inference/scenario.py           | 141 ++++++++----------
 optimum_benchmark/trackers/energy.py          |  16 +-
 optimum_benchmark/trackers/latency.py         |  16 +-
 optimum_benchmark/trackers/memory.py          |  15 +-
 14 files changed, 181 insertions(+), 268 deletions(-)

diff --git a/optimum_benchmark/backends/ipex/backend.py b/optimum_benchmark/backends/ipex/backend.py
index 8939fdb0..b584ff6c 100644
--- a/optimum_benchmark/backends/ipex/backend.py
+++ b/optimum_benchmark/backends/ipex/backend.py
@@ -84,31 +84,14 @@ def automodel_kwargs(self) -> Dict[str, Any]:
         if self.config.torch_dtype is not None:
             kwargs["torch_dtype"] = getattr(torch, self.config.torch_dtype)
 
-        print(kwargs)
-
         return kwargs
 
     @property
-    def is_dp_distributed(self) -> bool:
+    def split_between_processes(self) -> bool:
         return is_torch_distributed_available() and torch.distributed.is_initialized()
 
-    def prepare_input_shapes(self, input_shapes: Dict[str, Any]) -> Dict[str, Any]:
-        if self.is_dp_distributed:
-            if input_shapes["batch_size"] % torch.distributed.get_world_size() != 0:
-                raise ValueError(
-                    f"Batch size {input_shapes['batch_size']} must be divisible by "
-                    f"data parallel world size {torch.distributed.get_world_size()}"
-                )
-            # distributing batch size across processes
-            input_shapes["batch_size"] //= torch.distributed.get_world_size()
-
-        # registering input shapes for usage during model reshaping
-        self.input_shapes = input_shapes
-
-        return input_shapes
-
     def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
-        if self.is_dp_distributed:
+        if self.split_between_processes:
             with Accelerator().split_between_processes(inputs=inputs, apply_padding=False) as process_inputs:
                 inputs = process_inputs
 
diff --git a/optimum_benchmark/backends/onnxruntime/backend.py b/optimum_benchmark/backends/onnxruntime/backend.py
index 223da6dc..2fffcc36 100644
--- a/optimum_benchmark/backends/onnxruntime/backend.py
+++ b/optimum_benchmark/backends/onnxruntime/backend.py
@@ -280,20 +280,12 @@ def quantize_onnx_files(self) -> None:
         if self.pretrained_config is not None:
             self.pretrained_config.save_pretrained(self.quantized_model)
 
-    def prepare_input_shapes(self, input_shapes: Dict[str, Any]) -> Dict[str, Any]:
-        if self.is_dp_distributed:
-            if input_shapes["batch_size"] % torch.distributed.get_world_size() != 0:
-                raise ValueError(
-                    f"Batch size {input_shapes['batch_size']} must be divisible by "
-                    f"data parallel world size {torch.distributed.get_world_size()}"
-                )
-            # distributing batch size across processes
-            input_shapes["batch_size"] //= torch.distributed.get_world_size()
-
-        return input_shapes
+    @property
+    def split_between_processes(self) -> bool:
+        return is_torch_distributed_available() and torch.distributed.is_initialized()
 
     def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
-        if self.is_dp_distributed:
+        if self.split_between_processes:
             with Accelerator().split_between_processes(inputs=inputs, apply_padding=False) as process_inputs:
                 inputs = process_inputs
 
diff --git a/optimum_benchmark/backends/openvino/backend.py b/optimum_benchmark/backends/openvino/backend.py
index 9db49fb2..f0aa1925 100644
--- a/optimum_benchmark/backends/openvino/backend.py
+++ b/optimum_benchmark/backends/openvino/backend.py
@@ -82,7 +82,7 @@ def load(self) -> None:
         if self.config.reshape:
             static_shapes = {
                 key: value
-                for key, value in {**self.input_shapes, **self.model_shapes}.items()
+                for key, value in self.model_shapes.items()
                 if key in inspect.getfullargspec(self.pretrained_model.reshape).args
             }
             if ("sequence_length" in static_shapes) and ("height" in static_shapes) and ("width" in static_shapes):
@@ -135,20 +135,6 @@ def _load_ovmodel_with_no_weights(self) -> None:
             self.config.export = original_export
             self.config.model = original_model
 
-    @property
-    def is_dp_distributed(self) -> bool:
-        return is_torch_distributed_available() and torch.distributed.is_initialized()
-
-    @property
-    def ovmodel_kwargs(self) -> Dict[str, Any]:
-        kwargs = {}
-
-        if self.config.task in TEXT_GENERATION_TASKS:
-            kwargs["use_cache"] = self.config.use_cache
-            kwargs["use_merged"] = self.config.use_merged
-
-        return kwargs
-
     def quantize_automodel(self) -> None:
         self.logger.info("\t+ Attempting quantization")
         self.quantized_model = f"{self.tmpdir.name}/quantized_model"
@@ -181,23 +167,22 @@ def quantize_automodel(self) -> None:
             batch_size=1,
         )
 
-    def prepare_input_shapes(self, input_shapes: Dict[str, Any]) -> Dict[str, Any]:
-        if self.is_dp_distributed:
-            if input_shapes["batch_size"] % torch.distributed.get_world_size() != 0:
-                raise ValueError(
-                    f"Batch size {input_shapes['batch_size']} must be divisible by "
-                    f"data parallel world size {torch.distributed.get_world_size()}"
-                )
-            # distributing batch size across processes
-            input_shapes["batch_size"] //= torch.distributed.get_world_size()
+    @property
+    def ovmodel_kwargs(self) -> Dict[str, Any]:
+        kwargs = {}
 
-        # registering input shapes for usage during model reshaping
-        self.input_shapes = input_shapes
+        if self.config.task in TEXT_GENERATION_TASKS:
+            kwargs["use_cache"] = self.config.use_cache
+            kwargs["use_merged"] = self.config.use_merged
 
-        return input_shapes
+        return kwargs
+
+    @property
+    def split_between_processes(self) -> bool:
+        return is_torch_distributed_available() and torch.distributed.is_initialized()
 
     def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
-        if self.is_dp_distributed:
+        if self.split_between_processes:
             with Accelerator().split_between_processes(inputs=inputs, apply_padding=False) as process_inputs:
                 inputs = process_inputs
 
@@ -205,6 +190,14 @@ def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
             if hasattr(self.pretrained_model, "input_names") and key not in self.pretrained_model.input_names:
                 inputs.pop(key)
 
+        if "input_ids" in inputs:
+            self.model_shapes.update(dict(zip(["batch_size", "sequence_length"], inputs["input_ids"].shape)))
+
+        if "pixel_values" in inputs:
+            self.model_shapes.update(
+                dict(zip(["batch_size", "num_channels", "height", "width"], inputs["pixel_values"].shape))
+            )
+
         return inputs
 
     def forward(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict:
diff --git a/optimum_benchmark/backends/pytorch/backend.py b/optimum_benchmark/backends/pytorch/backend.py
index bfbe9745..5052e148 100644
--- a/optimum_benchmark/backends/pytorch/backend.py
+++ b/optimum_benchmark/backends/pytorch/backend.py
@@ -27,7 +27,7 @@
     import deepspeed  # type: ignore
 
 if is_torch_distributed_available():
-    import torch.distributed
+    import torch.distributed  # type: ignore
 
 if is_zentorch_available():
     import zentorch  # type: ignore # noqa: F401
@@ -326,18 +326,6 @@ def process_quantization_config(self) -> None:
         else:
             raise ValueError(f"Quantization scheme {self.config.quantization_scheme} not recognized")
 
-    @property
-    def is_distributed(self) -> bool:
-        return is_torch_distributed_available() and torch.distributed.is_initialized()
-
-    @property
-    def is_tp_distributed(self) -> bool:
-        return self.is_distributed and self.config.deepspeed_inference
-
-    @property
-    def is_dp_distributed(self) -> bool:
-        return self.is_distributed and not self.config.deepspeed_inference
-
     @property
     def is_quantized(self) -> bool:
         return self.config.quantization_scheme is not None or (
@@ -407,35 +395,26 @@ def automodel_kwargs(self) -> Dict[str, Any]:
 
         return kwargs
 
-    def prepare_input_shapes(self, input_shapes: Dict[str, Any]) -> Dict[str, Any]:
-        if self.is_dp_distributed:
-            if input_shapes["batch_size"] % torch.distributed.get_world_size() != 0:
-                raise ValueError(
-                    f"Batch size {input_shapes['batch_size']} must be divisible by "
-                    f"data parallel world size {torch.distributed.get_world_size()}"
-                )
-            # distributing batch size across processes
-            input_shapes["batch_size"] //= torch.distributed.get_world_size()
-
-        if self.is_tp_distributed:
-            if torch.distributed.get_rank() != 0:
-                # zeroing throughput on other ranks
-                input_shapes["batch_size"] = 0
-
-        return input_shapes
+    @property
+    def split_between_processes(self) -> bool:
+        return (
+            is_torch_distributed_available()
+            and torch.distributed.is_initialized()
+            and not self.config.deepspeed_inference
+        )
 
     def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
-        if self.is_dp_distributed:
+        if self.split_between_processes:
             with Accelerator().split_between_processes(inputs=inputs, apply_padding=False) as process_inputs:
                 inputs = process_inputs
 
-        if self.config.library == "timm":
-            inputs = {"x": inputs["pixel_values"]}
-
         for key, value in inputs.items():
             if isinstance(value, torch.Tensor):
                 inputs[key] = value.to(self.config.device)
 
+        if self.config.library == "timm":
+            inputs = {"x": inputs["pixel_values"]}
+
         return inputs
 
     @torch.inference_mode()
diff --git a/optimum_benchmark/backends/transformers_utils.py b/optimum_benchmark/backends/transformers_utils.py
index 3b38bc2c..efd2b8af 100644
--- a/optimum_benchmark/backends/transformers_utils.py
+++ b/optimum_benchmark/backends/transformers_utils.py
@@ -1,5 +1,5 @@
 from contextlib import contextmanager
-from typing import Any, Dict, Optional, Union
+from typing import Any, Dict, Optional, Type, Union
 
 import torch
 import transformers
@@ -7,6 +7,7 @@
     AutoConfig,
     AutoFeatureExtractor,
     AutoImageProcessor,
+    AutoModel,
     AutoProcessor,
     AutoTokenizer,
     FeatureExtractionMixin,
@@ -17,9 +18,7 @@
     SpecialTokensMixin,
 )
 
-from ..import_utils import is_torch_available
-
-TASKS_TO_MODEL_LOADERS = {
+TASKS_TO_AUTOMODEL_CLASS_NAMES = {
     # text processing
     "feature-extraction": "AutoModel",
     "fill-mask": "AutoModelForMaskedLM",
@@ -57,34 +56,26 @@
     "sentence-similarity": "feature-extraction",
 }
 
-if is_torch_available():
-    TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES = {}
-    for task_name, model_loaders in TASKS_TO_MODEL_LOADERS.items():
-        TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES[task_name] = {}
-
-        if isinstance(model_loaders, str):
-            model_loaders = (model_loaders,)
-
-        for model_loader_name in model_loaders:
-            model_loader_class = getattr(transformers, model_loader_name, None)
-            if model_loader_class is not None:
-                TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES[task_name].update(
-                    model_loader_class._model_mapping._model_mapping
-                )
-else:
-    TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES = {}
 
-
-def get_transformers_automodel_loader_for_task(task: str, model_type: Optional[str] = None):
+def get_transformers_automodel_class_for_task(task: str, model_type: Optional[str] = None) -> Type["AutoModel"]:
     if task in SYNONYM_TASKS:
         task = SYNONYM_TASKS[task]
 
-    if model_type is not None:
-        model_loader_name = TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES[task][model_type]
+    if task not in TASKS_TO_AUTOMODEL_CLASS_NAMES:
+        raise ValueError(f"Task {task} not supported")
+
+    if isinstance(TASKS_TO_AUTOMODEL_CLASS_NAMES[task], str):
+        return getattr(transformers, TASKS_TO_AUTOMODEL_CLASS_NAMES[task])
     else:
-        model_loader_name = TASKS_TO_MODEL_LOADERS[task]
+        if model_type is None:
+            raise ValueError(f"Task {task} requires a model_type to be specified")
+
+        for automodel_class_name in TASKS_TO_AUTOMODEL_CLASS_NAMES[task]:
+            automodel_class = getattr(transformers, automodel_class_name)
+            if model_type in automodel_class._model_mapping._model_mapping:
+                return automodel_class
 
-    return getattr(transformers, model_loader_name)
+    raise ValueError(f"Task {task} not supported for model type {model_type}")
 
 
 PretrainedProcessor = Union["FeatureExtractionMixin", "ImageProcessingMixin", "SpecialTokensMixin", "ProcessorMixin"]
diff --git a/optimum_benchmark/benchmark/report.py b/optimum_benchmark/benchmark/report.py
index c4b0602d..b9edd960 100644
--- a/optimum_benchmark/benchmark/report.py
+++ b/optimum_benchmark/benchmark/report.py
@@ -35,16 +35,26 @@ def __post_init__(self):
             self.efficiency = Efficiency(**self.efficiency)
 
     @staticmethod
-    def aggregate(measurements: List["TargetMeasurements"]) -> "TargetMeasurements":
+    def aggregate_across_processes(measurements: List["TargetMeasurements"]) -> "TargetMeasurements":
         assert len(measurements) > 0, "No measurements to aggregate"
 
         m0 = measurements[0]
 
-        memory = Memory.aggregate([m.memory for m in measurements]) if m0.memory is not None else None
-        latency = Latency.aggregate([m.latency for m in measurements]) if m0.latency is not None else None
-        throughput = Throughput.aggregate([m.throughput for m in measurements]) if m0.throughput is not None else None
-        energy = Energy.aggregate([m.energy for m in measurements]) if m0.energy is not None else None
-        efficiency = Efficiency.aggregate([m.efficiency for m in measurements]) if m0.efficiency is not None else None
+        memory = Memory.aggregate_across_processes([m.memory for m in measurements]) if m0.memory is not None else None
+        latency = (
+            Latency.aggregate_across_processes([m.latency for m in measurements]) if m0.latency is not None else None
+        )
+        throughput = (
+            Throughput.aggregate_across_processes([m.throughput for m in measurements])
+            if m0.throughput is not None
+            else None
+        )
+        energy = Energy.aggregate_across_processes([m.energy for m in measurements]) if m0.energy is not None else None
+        efficiency = (
+            Efficiency.aggregate_across_processes([m.efficiency for m in measurements])
+            if m0.efficiency is not None
+            else None
+        )
 
         return TargetMeasurements(
             memory=memory, latency=latency, throughput=throughput, energy=energy, efficiency=efficiency
@@ -99,11 +109,11 @@ def __post_init__(self):
                 setattr(self, target, TargetMeasurements(**getattr(self, target)))
 
     @classmethod
-    def aggregate(cls, reports: List["BenchmarkReport"]) -> "BenchmarkReport":
+    def aggregate_across_processes(cls, reports: List["BenchmarkReport"]) -> "BenchmarkReport":
         aggregated_measurements = {}
         for target in reports[0].to_dict().keys():
             measurements = [getattr(report, target) for report in reports]
-            aggregated_measurements[target] = TargetMeasurements.aggregate(measurements)
+            aggregated_measurements[target] = TargetMeasurements.aggregate_across_processes(measurements)
 
         return cls.from_dict(aggregated_measurements)
 
diff --git a/optimum_benchmark/generators/task_generator.py b/optimum_benchmark/generators/task_generator.py
index f11d21eb..96dbb2e5 100644
--- a/optimum_benchmark/generators/task_generator.py
+++ b/optimum_benchmark/generators/task_generator.py
@@ -445,6 +445,4 @@ def __call__(self):
     "image-text-to-text": ImageTextToTextGenerator,
     # diffusers pipelines tasks
     "text-to-image": PromptGenerator,
-    "stable-diffusion": PromptGenerator,
-    "stable-diffusion-xl": PromptGenerator,
 }
diff --git a/optimum_benchmark/launchers/torchrun/launcher.py b/optimum_benchmark/launchers/torchrun/launcher.py
index 10b45d4d..99d5ba12 100644
--- a/optimum_benchmark/launchers/torchrun/launcher.py
+++ b/optimum_benchmark/launchers/torchrun/launcher.py
@@ -100,7 +100,7 @@ def launch(self, worker: Callable[..., BenchmarkReport], worker_args: List[Any])
                 raise RuntimeError(f"Received an unexpected response from isolated process: {output}")
 
         self.logger.info("\t+ Aggregating reports from all rank processes")
-        report = BenchmarkReport.aggregate(reports)
+        report = BenchmarkReport.aggregate_across_processes(reports)
         return report
 
 
diff --git a/optimum_benchmark/scenarios/energy_star/scenario.py b/optimum_benchmark/scenarios/energy_star/scenario.py
index 39b12a04..8345cae0 100644
--- a/optimum_benchmark/scenarios/energy_star/scenario.py
+++ b/optimum_benchmark/scenarios/energy_star/scenario.py
@@ -38,7 +38,7 @@
 
 PREPROCESS_EFFICIENCY_UNIT = "samples/kWh"
 FORWARD_EFFICIENCY_UNIT = "samples/kWh"
-PREFILL_EFFICIENCY_UNIT = "tokens/kWh"
+PREFILL_EFFICIENCY_UNIT = "samples/kWh"
 DECODE_EFFICIENCY_UNIT = "tokens/kWh"
 CALL_EFFICIENCY_UNIT = "images/kWh"
 
@@ -50,9 +50,9 @@ def __init__(self, config: EnergyStarConfig) -> None:
         super().__init__(config)
 
     def run(self, backend: Backend[BackendConfigT]) -> BenchmarkReport:
-        self.task = backend.config.task
+        self.backend = backend
 
-        if self.task in TEXT_GENERATION_TASKS:
+        if self.backend.config.task in TEXT_GENERATION_TASKS:
             self.logger.info("\t+ Updating Text Generation kwargs with default values")
             self.config.generate_kwargs = {**TEXT_GENERATION_DEFAULT_KWARGS, **self.config.generate_kwargs}
             self.prefill_kwargs = {**self.config.generate_kwargs, **TEXT_GENERATION_PREFILL_OVERRIDES}
@@ -60,7 +60,7 @@ def run(self, backend: Backend[BackendConfigT]) -> BenchmarkReport:
             self.report = BenchmarkReport.from_list(
                 targets=["load_dataset", "preprocess_dataset", "load_model", "prefill", "decode"]
             )
-        elif self.task in IMAGE_DIFFUSION_TASKS:
+        elif self.backend.config.task in IMAGE_DIFFUSION_TASKS:
             self.logger.info("\t+ Updating Image Diffusion kwargs with default values")
             self.config.call_kwargs = {**IMAGE_DIFFUSION_DEFAULT_KWARGS, **self.config.call_kwargs}
             self.logger.info("\t+ Initializing Image Diffusion report")
@@ -80,17 +80,18 @@ def run(self, backend: Backend[BackendConfigT]) -> BenchmarkReport:
         )
 
         self.run_dataset_loading_energy_tracking()
-        self.run_model_loading_energy_tracking(backend)
         self.run_dataset_preprocessing_energy_tracking(backend)
 
         self.logger.info("\t+ Preparing sample inputs for model warmup")
-        self.raw_sample_inputs = self.dataset[: self.config.input_shapes["batch_size"]]
-        self.prepared_sample_inputs = backend.prepare_inputs(self.raw_sample_inputs)
+        self.sample_inputs = self.dataset[: self.config.input_shapes["batch_size"]]
+        self.sample_inputs = backend.prepare_inputs(self.sample_inputs)
+
+        self.run_model_loading_energy_tracking(backend)
 
-        if self.task in TEXT_GENERATION_TASKS:
+        if self.backend.config.task in TEXT_GENERATION_TASKS:
             self.warmup_text_generation(backend)
             self.run_text_generation_energy_tracking(backend)
-        elif self.task in IMAGE_DIFFUSION_TASKS:
+        elif self.backend.config.task in IMAGE_DIFFUSION_TASKS:
             self.warmup_image_diffusion(backend)
             self.run_image_diffusion_energy_tracking(backend)
         else:
@@ -115,7 +116,7 @@ def run_dataset_preprocessing_energy_tracking(self, backend: Backend[BackendConf
         self.logger.info("\t+ Running dataset preprocessing energy tracking")
 
         with self.energy_tracker.track(file_prefix="preprocess_dataset"):
-            self.dataset = TASKS_TO_PREPROCESSORS[self.task](
+            self.dataset = TASKS_TO_PREPROCESSORS[self.backend.config.task](
                 dataset=self.dataset,
                 scenario_config=self.config,
                 pretrained_config=backend.pretrained_config,
@@ -144,24 +145,22 @@ def run_model_loading_energy_tracking(self, backend: Backend[BackendConfigT]):
     # Text Generation warmup
     def warmup_text_generation(self, backend: Backend[BackendConfigT]):
         self.logger.info("\t+ Warming up backend for Text Generation")
-        backend.generate(self.prepared_sample_inputs, self.config.generate_kwargs)
+        backend.generate(self.sample_inputs, self.config.generate_kwargs)
         for _ in range(self.config.warmup_runs):
-            backend.generate(
-                self.prepared_sample_inputs, {**self.config.generate_kwargs, **TEXT_GENERATION_WARMUP_OVERRIDES}
-            )
+            backend.generate(self.sample_inputs, {**self.config.generate_kwargs, **TEXT_GENERATION_WARMUP_OVERRIDES})
 
     # Image Diffusion warmup
     def warmup_image_diffusion(self, backend: Backend[BackendConfigT]):
         self.logger.info("\t+ Warming up backend for Image Diffusion")
-        backend.call(self.prepared_sample_inputs, self.config.call_kwargs)
+        backend.call(self.sample_inputs, self.config.call_kwargs)
         for _ in range(self.config.warmup_runs):
-            backend.call(self.prepared_sample_inputs, {**self.config.call_kwargs, **IMAGE_DIFFUSION_WARMUP_OVERRIDES})
+            backend.call(self.sample_inputs, {**self.config.call_kwargs, **IMAGE_DIFFUSION_WARMUP_OVERRIDES})
 
     # Inference warmup
     def warmup_inference(self, backend: Backend[BackendConfigT]):
         self.logger.info("\t+ Warming up backend for Inference")
         for _ in range(self.config.warmup_runs):
-            backend.forward(self.prepared_sample_inputs, self.config.forward_kwargs)
+            backend.forward(self.sample_inputs, self.config.forward_kwargs)
 
     # Text Generation energy tracking
     def run_text_generation_energy_tracking(self, backend: Backend[BackendConfigT]):
@@ -243,25 +242,8 @@ def dataset_forward_volume(self) -> int:  # in samples
         return self.config.num_samples
 
     @property
-    def dataset_prefill_volume(self) -> int:  # in tokens
-        prefill_volume = 0
-
-        for sample in self.dataset:
-            if "input_ids" in sample.keys():
-                # text/image-text/video-image-text conditioned generation
-                prefill_volume += self.raw_sample_inputs["input_ids"].numel()
-            else:
-                # image/audio/other conditioned generation (1 bos token)
-                prefill_volume += 1
-
-        return prefill_volume
-
-    @property
-    def dataset_per_token_volume(self) -> int:  # in tokens
-        return (
-            self.config.num_samples
-            * self.config.generate_kwargs["num_beams"]  # at each beam stage there are num_beams tokens generated
-        )
+    def dataset_prefill_volume(self) -> int:  # in samples
+        return self.config.num_samples
 
     @property
     def dataset_decode_volume(self) -> int:  # in tokens
@@ -273,7 +255,7 @@ def dataset_decode_volume(self) -> int:  # in tokens
 
     @property
     def dataset_call_volume(self) -> int:  # in images
-        if self.task == "text-to-image":
+        if self.backend.config.task == "text-to-image":
             return self.config.num_samples * self.config.call_kwargs["num_images_per_prompt"]
         else:
             return self.config.num_samples
diff --git a/optimum_benchmark/scenarios/inference/config.py b/optimum_benchmark/scenarios/inference/config.py
index 57d482ab..d86962eb 100644
--- a/optimum_benchmark/scenarios/inference/config.py
+++ b/optimum_benchmark/scenarios/inference/config.py
@@ -9,7 +9,6 @@
 
 INPUT_SHAPES = {
     "batch_size": 2,
-    "sequence_length": 16,
 }
 
 
diff --git a/optimum_benchmark/scenarios/inference/scenario.py b/optimum_benchmark/scenarios/inference/scenario.py
index 2f0ac8e7..e05cb7b9 100644
--- a/optimum_benchmark/scenarios/inference/scenario.py
+++ b/optimum_benchmark/scenarios/inference/scenario.py
@@ -40,13 +40,17 @@
     "num_inference_steps": 2,
 }
 
-TEXT_GENERATION_THROUGHPUT_UNIT = "tokens/s"
-IMAGE_DIFFUSION_THROUGHPUT_UNIT = "images/s"
-INFERENCE_THROUGHPUT_UNIT = "samples/s"
 
-TEXT_GENERATION_EFFICIENCY_UNIT = "tokens/kWh"
-IMAGE_DIFFUSION_EFFICIENCY_UNIT = "images/kWh"
-INFERENCE_EFFICIENCY_UNIT = "samples/kWh"
+FORWARD_THROUGHPUT_UNIT = "samples/s"
+PREFILL_THROUGHPUT_UNIT = "samples/s"
+DECODE_THROUGHPUT_UNIT = "tokens/s"
+CALL_THROUGHPUT_UNIT = "images/s"
+
+
+FORWARD_EFFICIENCY_UNIT = "samples/kWh"
+PREFILL_EFFICIENCY_UNIT = "samples/kWh"
+DECODE_EFFICIENCY_UNIT = "tokens/kWh"
+CALL_EFFICIENCY_UNIT = "images/kWh"
 
 
 class InferenceScenario(Scenario[InferenceConfig]):
@@ -56,77 +60,71 @@ def __init__(self, config: InferenceConfig) -> None:
         super().__init__(config)
 
     def run(self, backend: Backend[BackendConfigT]) -> BenchmarkReport:
-        self.task = backend.config.task
+        self.backend = backend
 
-        self.logger.info("\t+ Creating input generator")
-        self.input_generator = InputGenerator(
-            task=self.task,
-            input_shapes=self.config.input_shapes,
-            model_shapes=backend.model_shapes,
-            model_type=backend.config.model_type,
-        )
-
-        if self.task in TEXT_GENERATION_TASKS:
-            self.logger.info("\t+ Generating Text Generation inputs")
-            self.inputs = self.input_generator()
+        if self.backend.config.task in TEXT_GENERATION_TASKS:
             self.logger.info("\t+ Updating Text Generation kwargs with default values")
             self.config.generate_kwargs = {**TEXT_GENERATION_DEFAULT_KWARGS, **self.config.generate_kwargs}
             self.logger.info("\t+ Initializing Text Generation report")
             self.report = BenchmarkReport.from_list(targets=["load", "prefill", "decode", "per_token"])
-        elif self.task in IMAGE_DIFFUSION_TASKS:
-            self.logger.info("\t+ Generating Image Diffusion inputs")
-            self.inputs = self.input_generator()
+        elif self.backend.config.task in IMAGE_DIFFUSION_TASKS:
             self.logger.info("\t+ Updating Image Diffusion kwargs with default values")
             self.config.call_kwargs = {**IMAGE_DIFFUSION_DEFAULT_KWARGS, **self.config.call_kwargs}
             self.logger.info("\t+ Initializing Image Diffusion report")
             self.report = BenchmarkReport.from_list(targets=["load", "call"])
         else:
-            self.logger.info("\t+ Generating Inference inputs")
-            self.inputs = self.input_generator()
             self.logger.info("\t+ Initializing Inference report")
             self.report = BenchmarkReport.from_list(targets=["load", "forward"])
 
-        self.logger.info("\t+ Preparing input shapes for Inference")
-        self.config.input_shapes = backend.prepare_input_shapes(input_shapes=self.config.input_shapes)
+        self.logger.info("\t+ Creating input generator")
+        self.input_generator = InputGenerator(
+            task=self.backend.config.task,
+            model_shapes=backend.model_shapes,
+            input_shapes=self.config.input_shapes,
+            model_type=backend.config.model_type,
+        )
 
-        self.run_model_loading_tracking(backend)
+        self.logger.info("\t+ Generating inputs")
+        self.inputs = self.input_generator()
 
         self.logger.info("\t+ Preparing inputs for Inference")
         self.inputs = backend.prepare_inputs(inputs=self.inputs)
 
+        self.run_model_loading_tracking(backend)
+
         if self.config.latency or self.config.energy:
             # latency and energy are metrics that require some warmup
             if self.config.warmup_runs > 0:
-                if self.task in TEXT_GENERATION_TASKS:
+                if self.backend.config.task in TEXT_GENERATION_TASKS:
                     self.warmup_text_generation(backend)
-                elif self.task in IMAGE_DIFFUSION_TASKS:
+                elif self.backend.config.task in IMAGE_DIFFUSION_TASKS:
                     self.warmup_image_diffusion(backend)
                 else:
                     self.warmup_inference(backend)
 
         if self.config.latency:
-            if self.task in TEXT_GENERATION_TASKS:
+            if self.backend.config.task in TEXT_GENERATION_TASKS:
                 if backend.config.name in PER_TOKEN_BACKENDS:
                     self.run_per_token_text_generation_latency_tracking(backend)
                 else:
                     self.run_text_generation_latency_tracking(backend)
-            elif self.task in IMAGE_DIFFUSION_TASKS:
+            elif self.backend.config.task in IMAGE_DIFFUSION_TASKS:
                 self.run_image_diffusion_latency_tracking(backend)
             else:
                 self.run_latency_inference_tracking(backend)
 
         if self.config.memory:
-            if self.task in TEXT_GENERATION_TASKS:
+            if self.backend.config.task in TEXT_GENERATION_TASKS:
                 self.run_text_generation_memory_tracking(backend)
-            elif self.task in IMAGE_DIFFUSION_TASKS:
+            elif self.backend.config.task in IMAGE_DIFFUSION_TASKS:
                 self.run_image_diffusion_memory_tracking(backend)
             else:
                 self.run_inference_memory_tracking(backend)
 
         if self.config.energy:
-            if self.task in TEXT_GENERATION_TASKS:
+            if self.backend.config.task in TEXT_GENERATION_TASKS:
                 self.run_text_generation_energy_tracking(backend)
-            elif self.task in IMAGE_DIFFUSION_TASKS:
+            elif self.backend.config.task in IMAGE_DIFFUSION_TASKS:
                 self.run_image_diffusion_energy_tracking(backend)
             else:
                 self.run_inference_energy_tracking(backend)
@@ -178,42 +176,42 @@ def run_model_loading_tracking(self, backend: Backend[BackendConfigT]):
     ## Memory tracking
     def run_text_generation_memory_tracking(self, backend: Backend[BackendConfigT]):
         self.logger.info("\t+ Running Text Generation memory tracking")
-        self.memory_tracker = MemoryTracker(
+        memory_tracker = MemoryTracker(
             backend=backend.config.name, device=backend.config.device, device_ids=backend.config.device_ids
         )
         prefill_kwargs = {**self.config.generate_kwargs, **TEXT_GENERATION_PREFILL_OVERRIDES}
 
-        with self.memory_tracker.track():
+        with memory_tracker.track():
             _ = backend.prefill(self.inputs, prefill_kwargs)
 
-        self.report.prefill.memory = self.memory_tracker.get_max_memory()
+        self.report.prefill.memory = memory_tracker.get_max_memory()
 
-        with self.memory_tracker.track():
+        with memory_tracker.track():
             _ = backend.generate(self.inputs, self.config.generate_kwargs)
 
-        self.report.decode.memory = self.memory_tracker.get_max_memory()
+        self.report.decode.memory = memory_tracker.get_max_memory()
 
     def run_image_diffusion_memory_tracking(self, backend: Backend[BackendConfigT]):
         self.logger.info("\t+ Running Image Diffusion memory tracking")
-        self.memory_tracker = MemoryTracker(
+        memory_tracker = MemoryTracker(
             backend=backend.config.name, device=backend.config.device, device_ids=backend.config.device_ids
         )
 
-        with self.memory_tracker.track():
+        with memory_tracker.track():
             _ = backend.call(self.inputs, self.config.call_kwargs)
 
-        self.report.call.memory = self.memory_tracker.get_max_memory()
+        self.report.call.memory = memory_tracker.get_max_memory()
 
     def run_inference_memory_tracking(self, backend: Backend[BackendConfigT]):
         self.logger.info("\t+ Running Inference memory tracking")
-        self.memory_tracker = MemoryTracker(
+        memory_tracker = MemoryTracker(
             backend=backend.config.name, device=backend.config.device, device_ids=backend.config.device_ids
         )
 
-        with self.memory_tracker.track():
+        with memory_tracker.track():
             _ = backend.forward(self.inputs, self.config.forward_kwargs)
 
-        self.report.forward.memory = self.memory_tracker.get_max_memory()
+        self.report.forward.memory = memory_tracker.get_max_memory()
 
     ## Latency tracking
     def run_per_token_text_generation_latency_tracking(self, backend: Backend[BackendConfigT]):
@@ -229,7 +227,6 @@ def run_per_token_text_generation_latency_tracking(self, backend: Backend[Backen
         prefill_latency = latency_tracker.get_prefill_latency()
         decode_latency = latency_tracker.get_decode_latency()
 
-        per_token_volume = self.atomic_per_token_volume
         prefill_volume = self.atomic_prefill_volume
         decode_volume = self.atomic_decode_volume
 
@@ -237,14 +234,12 @@ def run_per_token_text_generation_latency_tracking(self, backend: Backend[Backen
         self.report.prefill.latency = prefill_latency
         self.report.decode.latency = decode_latency
 
-        self.report.per_token.throughput = Throughput.from_latency(
-            per_token_latency, per_token_volume, unit=TEXT_GENERATION_THROUGHPUT_UNIT
-        )
+        # we don't register a per-token throughput, as it's a confusing metric and the same as the decode throughput
         self.report.prefill.throughput = Throughput.from_latency(
-            prefill_latency, prefill_volume, unit=TEXT_GENERATION_THROUGHPUT_UNIT
+            prefill_latency, prefill_volume, unit=PREFILL_THROUGHPUT_UNIT
         )
         self.report.decode.throughput = Throughput.from_latency(
-            decode_latency, decode_volume, unit=TEXT_GENERATION_THROUGHPUT_UNIT
+            decode_latency, decode_volume, unit=DECODE_THROUGHPUT_UNIT
         )
 
     def run_text_generation_latency_tracking(self, backend: Backend[BackendConfigT]):
@@ -261,7 +256,7 @@ def run_text_generation_latency_tracking(self, backend: Backend[BackendConfigT])
 
         self.report.prefill.latency = prefill_latency
         self.report.prefill.throughput = Throughput.from_latency(
-            prefill_latency, prefill_volume, unit=TEXT_GENERATION_THROUGHPUT_UNIT
+            prefill_latency, prefill_volume, unit=PREFILL_THROUGHPUT_UNIT
         )
 
         latency_tracker.reset()
@@ -275,7 +270,7 @@ def run_text_generation_latency_tracking(self, backend: Backend[BackendConfigT])
 
         self.report.decode.latency = decode_latency
         self.report.decode.throughput = Throughput.from_latency(
-            decode_latency, decode_volume, unit=TEXT_GENERATION_THROUGHPUT_UNIT
+            decode_latency, decode_volume, unit=DECODE_THROUGHPUT_UNIT
         )
 
     def run_image_diffusion_latency_tracking(self, backend: Backend[BackendConfigT]):
@@ -290,9 +285,7 @@ def run_image_diffusion_latency_tracking(self, backend: Backend[BackendConfigT])
         call_volume = self.atomic_call_volume
 
         self.report.call.latency = call_latency
-        self.report.call.throughput = Throughput.from_latency(
-            call_latency, call_volume, unit=IMAGE_DIFFUSION_THROUGHPUT_UNIT
-        )
+        self.report.call.throughput = Throughput.from_latency(call_latency, call_volume, unit=CALL_THROUGHPUT_UNIT)
 
     def run_latency_inference_tracking(self, backend: Backend[BackendConfigT]):
         self.logger.info("\t+ Running Inference latency tracking")
@@ -307,7 +300,7 @@ def run_latency_inference_tracking(self, backend: Backend[BackendConfigT]):
 
         self.report.forward.latency = forward_latency
         self.report.forward.throughput = Throughput.from_latency(
-            forward_latency, forward_volume, unit=INFERENCE_THROUGHPUT_UNIT
+            forward_latency, forward_volume, unit=FORWARD_THROUGHPUT_UNIT
         )
 
     ## Energy tracking
@@ -333,7 +326,7 @@ def run_text_generation_energy_tracking(self, backend: Backend[BackendConfigT]):
 
         self.report.prefill.energy = prefill_energy
         self.report.prefill.efficiency = Efficiency.from_energy(
-            prefill_energy, prefill_volume, unit=TEXT_GENERATION_EFFICIENCY_UNIT
+            prefill_energy, prefill_volume, unit=PREFILL_EFFICIENCY_UNIT
         )
 
         count = 0
@@ -352,7 +345,7 @@ def run_text_generation_energy_tracking(self, backend: Backend[BackendConfigT]):
 
         self.report.decode.energy = decode_energy
         self.report.decode.efficiency = Efficiency.from_energy(
-            decode_energy, decode_volume, unit=TEXT_GENERATION_EFFICIENCY_UNIT
+            decode_energy, decode_volume, unit=DECODE_EFFICIENCY_UNIT
         )
 
     def run_image_diffusion_energy_tracking(self, backend: Backend[BackendConfigT]):
@@ -375,9 +368,7 @@ def run_image_diffusion_energy_tracking(self, backend: Backend[BackendConfigT]):
         call_volume = self.atomic_call_volume
 
         self.report.call.energy = call_energy
-        self.report.call.efficiency = Efficiency.from_energy(
-            call_energy, call_volume, unit=IMAGE_DIFFUSION_EFFICIENCY_UNIT
-        )
+        self.report.call.efficiency = Efficiency.from_energy(call_energy, call_volume, unit=CALL_EFFICIENCY_UNIT)
 
     def run_inference_energy_tracking(self, backend: Backend[BackendConfigT]):
         self.logger.info("\t+ Running energy tracking")
@@ -400,31 +391,19 @@ def run_inference_energy_tracking(self, backend: Backend[BackendConfigT]):
 
         self.report.forward.energy = forward_energy
         self.report.forward.efficiency = Efficiency.from_energy(
-            forward_energy, forward_volume, unit=INFERENCE_EFFICIENCY_UNIT
+            forward_energy, forward_volume, unit=FORWARD_EFFICIENCY_UNIT
         )
 
     @property
-    def atomic_forward_volume(self) -> int:  # in samples
+    def atomic_forward_volume(self) -> int:  # in terms of processed samples
         return self.config.input_shapes["batch_size"]
 
     @property
-    def atomic_prefill_volume(self) -> int:  # in tokens
-        if {"input_ids", "prompt", "prompts"} & set(self.inputs.keys()):
-            # text conditioned generation (sequence_length tokens)
-            return self.config.input_shapes["batch_size"] * self.config.input_shapes["sequence_length"]
-        else:
-            # image/audio conditioned generation (1 bos token)
-            return self.config.input_shapes["batch_size"]
-
-    @property
-    def atomic_per_token_volume(self) -> int:  # in tokens
-        return (
-            self.config.input_shapes["batch_size"]
-            * self.config.generate_kwargs["num_beams"]  # at each beam stage there are num_beams tokens generated
-        )
+    def atomic_prefill_volume(self) -> int:  # in terms of processed samples
+        return self.config.input_shapes["batch_size"]
 
     @property
-    def atomic_decode_volume(self) -> int:  # in tokens
+    def atomic_decode_volume(self) -> int:  # in terms of output/generated tokens
         return (
             self.config.input_shapes["batch_size"]
             * self.config.generate_kwargs["num_beams"]  # at each beam stage there are num_beams tokens generated
@@ -432,8 +411,8 @@ def atomic_decode_volume(self) -> int:  # in tokens
         )
 
     @property
-    def atomic_call_volume(self) -> int:  # in images
-        if self.task == "text-to-image":
+    def atomic_call_volume(self) -> int:  # in terms of output images
+        if self.backend.config.task == "text-to-image":
             return self.config.input_shapes["batch_size"] * self.config.call_kwargs["num_images_per_prompt"]
         else:
             return self.config.input_shapes["batch_size"]
diff --git a/optimum_benchmark/trackers/energy.py b/optimum_benchmark/trackers/energy.py
index 3586809f..427c4d40 100644
--- a/optimum_benchmark/trackers/energy.py
+++ b/optimum_benchmark/trackers/energy.py
@@ -61,19 +61,20 @@ def __truediv__(self, scalar: float) -> "Energy":
         )
 
     @staticmethod
-    def aggregate(energies: List["Energy"]) -> "Energy":
-        if len(energies) == 0 or all(energy is None for energy in energies):
-            return None
+    def aggregate_across_processes(energies: List[Optional["Energy"]]) -> Optional["Energy"]:
+        if len(energies) == 0:
+            raise ValueError("No energy measurements to aggregate")
         elif any(energy is None for energy in energies):
             raise ValueError("Some energy measurements are missing")
 
         # since measurements are machine-level, we just take the average
+        total = sum(energy.total for energy in energies) / len(energies)
         cpu = sum(energy.cpu for energy in energies) / len(energies)
         gpu = sum(energy.gpu for energy in energies) / len(energies)
         ram = sum(energy.ram for energy in energies) / len(energies)
-        total = sum(energy.total for energy in energies) / len(energies)
+        unit = energies[0].unit
 
-        return Energy(cpu=cpu, gpu=gpu, ram=ram, total=total, unit=ENERGY_UNIT)
+        return Energy(cpu=cpu, gpu=gpu, ram=ram, total=total, unit=unit)
 
     def to_plain_text(self) -> str:
         plain_text = ""
@@ -109,14 +110,15 @@ class Efficiency:
     value: float
 
     @staticmethod
-    def aggregate(efficiencies: List["Efficiency"]) -> "Efficiency":
+    def aggregate_across_processes(efficiencies: List[Optional["Efficiency"]]) -> Optional["Efficiency"]:
         if len(efficiencies) == 0:
             raise ValueError("No efficiency measurements to aggregate")
         elif any(efficiency is None for efficiency in efficiencies):
             raise ValueError("Some efficiency measurements are None")
 
-        unit = efficiencies[0].unit
+        # since measurements are machine-level, we just take the average
         value = sum(efficiency.value for efficiency in efficiencies) / len(efficiencies)
+        unit = efficiencies[0].unit
 
         return Efficiency(value=value, unit=unit)
 
diff --git a/optimum_benchmark/trackers/latency.py b/optimum_benchmark/trackers/latency.py
index 908108cb..de4ab341 100644
--- a/optimum_benchmark/trackers/latency.py
+++ b/optimum_benchmark/trackers/latency.py
@@ -53,14 +53,17 @@ def __sub__(self, latency: "Latency") -> "Latency":
         return Latency.from_values(values=latencies, unit=self.unit)
 
     @staticmethod
-    def aggregate(latencies: List["Latency"]) -> "Latency":
-        if len(latencies) == 0 or all(latency is None for latency in latencies):
-            return None
+    def aggregate_across_processes(latencies: List["Latency"]) -> "Latency":
+        if len(latencies) == 0:
+            raise ValueError("No latency measurements to aggregate")
         elif any(latency is None for latency in latencies):
             raise ValueError("Some latency measurements are missing")
 
-        unit = latencies[0].unit
+        # we combine the lists of latencies and statistics are then computed on this list
         values = sum((lat.values for lat in latencies), [])
+
+        unit = latencies[0].unit
+
         return Latency.from_values(values=values, unit=unit)
 
     @staticmethod
@@ -123,14 +126,15 @@ class Throughput:
     value: float
 
     @staticmethod
-    def aggregate(throughputs: List["Throughput"]) -> "Throughput":
+    def aggregate_across_processes(throughputs: List[Optional["Throughput"]]) -> Optional["Throughput"]:
         if len(throughputs) == 0:
             raise ValueError("No throughput measurements to aggregate")
         elif any(throughput is None for throughput in throughputs):
             raise ValueError("Some throughput measurements are missing")
 
+        # we compute throughputs on the whole input level so we just take the average
+        value = sum(throughput.value for throughput in throughputs) / len(throughputs)
         unit = throughputs[0].unit
-        value = sum(throughput.value for throughput in throughputs)
 
         return Throughput(value=value, unit=unit)
 
diff --git a/optimum_benchmark/trackers/memory.py b/optimum_benchmark/trackers/memory.py
index 5e9359b1..47edf71e 100644
--- a/optimum_benchmark/trackers/memory.py
+++ b/optimum_benchmark/trackers/memory.py
@@ -52,16 +52,14 @@ class Memory:
     max_allocated: Optional[float] = None
 
     @staticmethod
-    def aggregate(memories: List["Memory"]) -> "Memory":
+    def aggregate_across_processes(memories: List["Memory"]) -> "Memory":
         if len(memories) == 0:
             raise ValueError("No memory measurements to aggregate")
         elif any(memory is None for memory in memories):
             raise ValueError("Some memory measurements are missing")
 
-        unit = memories[0].unit
-
-        # process specific measurements
-        max_ram = sum(memory.max_ram for memory in memories)
+        # ram, reserved, allocated, and process_vram measurements are process-specific so they are summed
+        max_ram = sum(memory.max_ram for memory in memories) if memories[0].max_ram is not None else None
         max_reserved = sum(memory.max_reserved for memory in memories) if memories[0].max_reserved is not None else None
         max_allocated = (
             sum(memory.max_allocated for memory in memories) if memories[0].max_allocated is not None else None
@@ -69,10 +67,13 @@ def aggregate(memories: List["Memory"]) -> "Memory":
         max_process_vram = (
             sum(memory.max_process_vram for memory in memories) if memories[0].max_process_vram is not None else None
         )
-        # machine level measurements
+        # global_vram is not process-specific so we take the average
         max_global_vram = (
-            max(memory.max_global_vram for memory in memories) if memories[0].max_global_vram is not None else None
+            sum(memory.max_global_vram for memory in memories) / len(memories)
+            if memories[0].max_global_vram is not None
+            else None
         )
+        unit = memories[0].unit
 
         return Memory(
             unit=unit,

From 712d8517ab801e3674339834c94df34feb558564 Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Wed, 27 Nov 2024 10:32:59 +0100
Subject: [PATCH 08/16] fix style

---
 examples/pytorch_llama.py                     | 2 +-
 optimum_benchmark/backends/pytorch/backend.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/pytorch_llama.py b/examples/pytorch_llama.py
index bcaaedcd..fe732bfa 100644
--- a/examples/pytorch_llama.py
+++ b/examples/pytorch_llama.py
@@ -33,7 +33,7 @@
         "torch_dtype": "bfloat16",
         "quantization_scheme": "torchao",
         "quantization_config": {"quant_type": "int4_weight_only", "group_size": 128},
-    }
+    },
 }
 
 
diff --git a/optimum_benchmark/backends/pytorch/backend.py b/optimum_benchmark/backends/pytorch/backend.py
index cf96c5a6..651e6d12 100644
--- a/optimum_benchmark/backends/pytorch/backend.py
+++ b/optimum_benchmark/backends/pytorch/backend.py
@@ -11,12 +11,12 @@
     AwqConfig,
     BitsAndBytesConfig,
     GPTQConfig,
+    TorchAoConfig,
     Trainer,
     TrainerCallback,
     TrainerState,
     TrainingArguments,
 )
-from transformers import TorchAoConfig
 
 from ...import_utils import is_deepspeed_available, is_torch_distributed_available, is_zentorch_available
 from ..base import Backend

From 88b86faae12478eb22d730d2886822bdb0286e5b Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Wed, 27 Nov 2024 15:42:55 +0100
Subject: [PATCH 09/16] fix

---
 optimum_benchmark/backends/base.py            |  12 +-
 optimum_benchmark/backends/diffusers_utils.py |  42 +---
 optimum_benchmark/backends/timm_utils.py      |  14 +-
 .../backends/transformers_utils.py            |  48 +---
 .../scenarios/inference/scenario.py           |  26 ++-
 optimum_benchmark/task_utils.py               | 215 +++++++++++++-----
 tests/configs/_st_bert_.yaml                  |   3 +
 ...yaml => cpu_inference_py_txi_st_bert.yaml} |   4 +-
 ...aml => cuda_inference_py_txi_st_bert.yaml} |   4 +-
 tests/test_api.py                             |  20 +-
 10 files changed, 214 insertions(+), 174 deletions(-)
 create mode 100644 tests/configs/_st_bert_.yaml
 rename tests/configs/{cpu_inference_py_txi_bert.yaml => cpu_inference_py_txi_st_bert.yaml} (77%)
 rename tests/configs/{cuda_inference_py_txi_bert.yaml => cuda_inference_py_txi_st_bert.yaml} (77%)

diff --git a/optimum_benchmark/backends/base.py b/optimum_benchmark/backends/base.py
index 1c039163..8488b457 100644
--- a/optimum_benchmark/backends/base.py
+++ b/optimum_benchmark/backends/base.py
@@ -13,14 +13,14 @@
 from .config import BackendConfigT
 from .diffusers_utils import (
     extract_diffusers_shapes_from_model,
-    get_diffusers_automodel_loader_for_task,
+    get_diffusers_auto_pipeline_class_for_task,
     get_diffusers_pretrained_config,
 )
-from .timm_utils import extract_timm_shapes_from_config, get_timm_automodel_loader, get_timm_pretrained_config
+from .timm_utils import extract_timm_shapes_from_config, get_timm_model_creator, get_timm_pretrained_config
 from .transformers_utils import (
     PretrainedProcessor,
     extract_transformers_shapes_from_artifacts,
-    get_transformers_automodel_loader_for_task,
+    get_transformers_auto_model_class_for_task,
     get_transformers_generation_config,
     get_transformers_pretrained_config,
     get_transformers_pretrained_processor,
@@ -56,7 +56,7 @@ def __init__(self, config: BackendConfigT):
             self.logger.info("\t+ Benchmarking a Diffusers pipeline")
             self.pretrained_config = get_diffusers_pretrained_config(self.config.model, **self.config.model_kwargs)
             self.model_shapes = extract_diffusers_shapes_from_model(self.config.model, **self.config.model_kwargs)
-            self.automodel_loader = get_diffusers_automodel_loader_for_task(self.config.task)
+            self.automodel_loader = get_diffusers_auto_pipeline_class_for_task(self.config.task)
             self.pretrained_processor = None
             self.generation_config = None
 
@@ -64,7 +64,7 @@ def __init__(self, config: BackendConfigT):
             self.logger.info("\t+ Benchmarking a Timm model")
             self.pretrained_config = get_timm_pretrained_config(self.config.model)
             self.model_shapes = extract_timm_shapes_from_config(self.pretrained_config)
-            self.automodel_loader = get_timm_automodel_loader()
+            self.automodel_loader = get_timm_model_creator()
             self.pretrained_processor = None
             self.generation_config = None
 
@@ -78,7 +78,7 @@ def __init__(self, config: BackendConfigT):
 
         else:
             self.logger.info("\t+ Benchmarking a Transformers model")
-            self.automodel_loader = get_transformers_automodel_loader_for_task(self.config.task, self.config.model_type)
+            self.automodel_loader = get_transformers_auto_model_class_for_task(self.config.task, self.config.model_type)
             self.generation_config = get_transformers_generation_config(self.config.model, **self.config.model_kwargs)
             self.pretrained_config = get_transformers_pretrained_config(self.config.model, **self.config.model_kwargs)
             self.pretrained_processor = get_transformers_pretrained_processor(
diff --git a/optimum_benchmark/backends/diffusers_utils.py b/optimum_benchmark/backends/diffusers_utils.py
index 43f0757b..ef1b4a59 100644
--- a/optimum_benchmark/backends/diffusers_utils.py
+++ b/optimum_benchmark/backends/diffusers_utils.py
@@ -9,33 +9,16 @@
     import diffusers
     from diffusers import DiffusionPipeline
 
-    if hasattr(diffusers, "pipelines") and hasattr(diffusers.pipelines, "auto_pipeline"):
-        from diffusers.pipelines.auto_pipeline import (
-            AUTO_IMAGE2IMAGE_PIPELINES_MAPPING,
-            AUTO_INPAINT_PIPELINES_MAPPING,
-            AUTO_TEXT2IMAGE_PIPELINES_MAPPING,
-        )
-
-        TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES = {
-            "inpainting": AUTO_INPAINT_PIPELINES_MAPPING.copy(),
-            "text-to-image": AUTO_TEXT2IMAGE_PIPELINES_MAPPING.copy(),
-            "image-to-image": AUTO_IMAGE2IMAGE_PIPELINES_MAPPING.copy(),
-        }
-
-        for task_name, model_mapping in TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES.items():
-            for model_type, model_class in model_mapping.items():
-                TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES[task_name][model_type] = model_class.__name__
-    else:
-        TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES = {}
-else:
-    TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES = {}
 
+def get_diffusers_auto_pipeline_class_for_task(task: str):
+    from ..task_utils import TASKS_TO_AUTO_PIPELINE_CLASS_NAMES
 
-TASKS_TO_MODEL_LOADERS = {
-    "inpainting": "AutoPipelineForInpainting",
-    "text-to-image": "AutoPipelineForText2Image",
-    "image-to-image": "AutoPipelineForImage2Image",
-}
+    if not is_diffusers_available():
+        raise ImportError("diffusers is not available. Please, pip install diffusers.")
+
+    model_loader_name = TASKS_TO_AUTO_PIPELINE_CLASS_NAMES.get(task, None)
+    model_loader_class = getattr(diffusers, model_loader_name)
+    return model_loader_class
 
 
 def get_diffusers_pretrained_config(model: str, **kwargs) -> Dict[str, int]:
@@ -85,12 +68,3 @@ def extract_diffusers_shapes_from_model(model: str, **kwargs) -> Dict[str, int]:
         shapes["width"] = -1
 
     return shapes
-
-
-def get_diffusers_automodel_loader_for_task(task: str):
-    if not is_diffusers_available():
-        raise ImportError("diffusers is not available. Please, pip install diffusers.")
-
-    model_loader_name = TASKS_TO_MODEL_LOADERS[task]
-    model_loader_class = getattr(diffusers, model_loader_name)
-    return model_loader_class
diff --git a/optimum_benchmark/backends/timm_utils.py b/optimum_benchmark/backends/timm_utils.py
index dbaf36fd..4cb3cd1c 100644
--- a/optimum_benchmark/backends/timm_utils.py
+++ b/optimum_benchmark/backends/timm_utils.py
@@ -10,6 +10,13 @@
     from timm.models import get_pretrained_cfg, load_model_config_from_hf, parse_model_name
 
 
+def get_timm_model_creator():
+    if not is_timm_available():
+        raise ImportError("timm is not available. Please, pip install timm.")
+
+    return create_model
+
+
 def get_timm_pretrained_config(model_name: str) -> PretrainedConfig:
     if not is_timm_available():
         raise ImportError("timm is not available. Please, pip install timm.")
@@ -71,10 +78,3 @@ def extract_timm_shapes_from_config(config: PretrainedConfig) -> Dict[str, Any]:
         warnings.warn("Could not extract shapes [num_channels, height, width] from timm model config.")
 
     return shapes
-
-
-def get_timm_automodel_loader():
-    if not is_timm_available():
-        raise ImportError("timm is not available. Please, pip install timm.")
-
-    return create_model
diff --git a/optimum_benchmark/backends/transformers_utils.py b/optimum_benchmark/backends/transformers_utils.py
index efd2b8af..7226dd7c 100644
--- a/optimum_benchmark/backends/transformers_utils.py
+++ b/optimum_benchmark/backends/transformers_utils.py
@@ -18,59 +18,23 @@
     SpecialTokensMixin,
 )
 
-TASKS_TO_AUTOMODEL_CLASS_NAMES = {
-    # text processing
-    "feature-extraction": "AutoModel",
-    "fill-mask": "AutoModelForMaskedLM",
-    "multiple-choice": "AutoModelForMultipleChoice",
-    "question-answering": "AutoModelForQuestionAnswering",
-    "token-classification": "AutoModelForTokenClassification",
-    "text-classification": "AutoModelForSequenceClassification",
-    # audio processing
-    "audio-xvector": "AutoModelForAudioXVector",
-    "text-to-audio": "AutoModelForTextToSpectrogram",
-    "audio-classification": "AutoModelForAudioClassification",
-    "audio-frame-classification": "AutoModelForAudioFrameClassification",
-    # image processing
-    "mask-generation": "AutoModel",
-    "image-to-image": "AutoModelForImageToImage",
-    "masked-im": "AutoModelForMaskedImageModeling",
-    "object-detection": "AutoModelForObjectDetection",
-    "depth-estimation": "AutoModelForDepthEstimation",
-    "image-segmentation": "AutoModelForImageSegmentation",
-    "image-classification": "AutoModelForImageClassification",
-    "semantic-segmentation": "AutoModelForSemanticSegmentation",
-    "zero-shot-object-detection": "AutoModelForZeroShotObjectDetection",
-    "zero-shot-image-classification": "AutoModelForZeroShotImageClassification",
-    # text generation
-    "image-to-text": "AutoModelForVision2Seq",
-    "text-generation": "AutoModelForCausalLM",
-    "text2text-generation": "AutoModelForSeq2SeqLM",
-    "image-text-to-text": "AutoModelForImageTextToText",
-    "visual-question-answering": "AutoModelForVisualQuestionAnswering",
-    "automatic-speech-recognition": ("AutoModelForSpeechSeq2Seq", "AutoModelForCTC"),
-}
-
-SYNONYM_TASKS = {
-    "summarization": "text2text-generation",
-    "sentence-similarity": "feature-extraction",
-}
 
+def get_transformers_auto_model_class_for_task(task: str, model_type: Optional[str] = None) -> Type["AutoModel"]:
+    from ..task_utils import SYNONYM_TASKS, TASKS_TO_AUTO_MODEL_CLASS_NAMES
 
-def get_transformers_automodel_class_for_task(task: str, model_type: Optional[str] = None) -> Type["AutoModel"]:
     if task in SYNONYM_TASKS:
         task = SYNONYM_TASKS[task]
 
-    if task not in TASKS_TO_AUTOMODEL_CLASS_NAMES:
+    if task not in TASKS_TO_AUTO_MODEL_CLASS_NAMES:
         raise ValueError(f"Task {task} not supported")
 
-    if isinstance(TASKS_TO_AUTOMODEL_CLASS_NAMES[task], str):
-        return getattr(transformers, TASKS_TO_AUTOMODEL_CLASS_NAMES[task])
+    if isinstance(TASKS_TO_AUTO_MODEL_CLASS_NAMES[task], str):
+        return getattr(transformers, TASKS_TO_AUTO_MODEL_CLASS_NAMES[task])
     else:
         if model_type is None:
             raise ValueError(f"Task {task} requires a model_type to be specified")
 
-        for automodel_class_name in TASKS_TO_AUTOMODEL_CLASS_NAMES[task]:
+        for automodel_class_name in TASKS_TO_AUTO_MODEL_CLASS_NAMES[task]:
             automodel_class = getattr(transformers, automodel_class_name)
             if model_type in automodel_class._model_mapping._model_mapping:
                 return automodel_class
diff --git a/optimum_benchmark/scenarios/inference/scenario.py b/optimum_benchmark/scenarios/inference/scenario.py
index e05cb7b9..c7faffed 100644
--- a/optimum_benchmark/scenarios/inference/scenario.py
+++ b/optimum_benchmark/scenarios/inference/scenario.py
@@ -66,15 +66,17 @@ def run(self, backend: Backend[BackendConfigT]) -> BenchmarkReport:
             self.logger.info("\t+ Updating Text Generation kwargs with default values")
             self.config.generate_kwargs = {**TEXT_GENERATION_DEFAULT_KWARGS, **self.config.generate_kwargs}
             self.logger.info("\t+ Initializing Text Generation report")
-            self.report = BenchmarkReport.from_list(targets=["load", "prefill", "decode", "per_token"])
+            self.report = BenchmarkReport.from_list(targets=["load_model", "prefill", "decode", "per_token"])
         elif self.backend.config.task in IMAGE_DIFFUSION_TASKS:
             self.logger.info("\t+ Updating Image Diffusion kwargs with default values")
             self.config.call_kwargs = {**IMAGE_DIFFUSION_DEFAULT_KWARGS, **self.config.call_kwargs}
             self.logger.info("\t+ Initializing Image Diffusion report")
-            self.report = BenchmarkReport.from_list(targets=["load", "call"])
+            self.report = BenchmarkReport.from_list(targets=["load_model", "call"])
         else:
             self.logger.info("\t+ Initializing Inference report")
-            self.report = BenchmarkReport.from_list(targets=["load", "forward"])
+            self.report = BenchmarkReport.from_list(targets=["load_model", "forward"])
+
+        self.run_model_loading_tracking(backend)
 
         self.logger.info("\t+ Creating input generator")
         self.input_generator = InputGenerator(
@@ -83,15 +85,11 @@ def run(self, backend: Backend[BackendConfigT]) -> BenchmarkReport:
             input_shapes=self.config.input_shapes,
             model_type=backend.config.model_type,
         )
-
         self.logger.info("\t+ Generating inputs")
         self.inputs = self.input_generator()
-
-        self.logger.info("\t+ Preparing inputs for Inference")
+        self.logger.info("\t+ Preparing inputs for backend")
         self.inputs = backend.prepare_inputs(inputs=self.inputs)
 
-        self.run_model_loading_tracking(backend)
-
         if self.config.latency or self.config.energy:
             # latency and energy are metrics that require some warmup
             if self.config.warmup_runs > 0:
@@ -159,8 +157,14 @@ def run_model_loading_tracking(self, backend: Backend[BackendConfigT]):
             )
         if self.config.latency:
             latency_tracker = LatencyTracker(backend=backend.config.name, device=backend.config.device)
+        if self.config.energy:
+            energy_tracker = EnergyTracker(
+                backend=backend.config.name, device=backend.config.device, device_ids=backend.config.device_ids
+            )
 
         with ExitStack() as context_stack:
+            if self.config.energy:
+                context_stack.enter_context(energy_tracker.track())
             if self.config.memory:
                 context_stack.enter_context(memory_tracker.track())
             if self.config.latency:
@@ -169,9 +173,11 @@ def run_model_loading_tracking(self, backend: Backend[BackendConfigT]):
             backend.load()
 
         if self.config.latency:
-            self.report.load.latency = latency_tracker.get_latency()
+            self.report.load_model.latency = latency_tracker.get_latency()
         if self.config.memory:
-            self.report.load.memory = memory_tracker.get_max_memory()
+            self.report.load_model.memory = memory_tracker.get_max_memory()
+        if self.config.energy:
+            self.report.load_model.energy = energy_tracker.get_energy()
 
     ## Memory tracking
     def run_text_generation_memory_tracking(self, backend: Backend[BackendConfigT]):
diff --git a/optimum_benchmark/task_utils.py b/optimum_benchmark/task_utils.py
index 0a2a98c2..1821b47d 100644
--- a/optimum_benchmark/task_utils.py
+++ b/optimum_benchmark/task_utils.py
@@ -5,38 +5,94 @@
 
 import huggingface_hub
 
-from .backends.diffusers_utils import (
-    TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES as DIFFUSERS_TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES,
-)
-from .backends.diffusers_utils import (
-    get_diffusers_pretrained_config,
-)
+from .backends.diffusers_utils import get_diffusers_pretrained_config
 from .backends.timm_utils import get_timm_pretrained_config
-from .backends.transformers_utils import (
-    TASKS_TO_MODEL_LOADERS,
-    get_transformers_pretrained_config,
-)
-from .backends.transformers_utils import (
-    TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES as TRANSFORMERS_TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES,
-)
-
-_SYNONYM_TASK_MAP = {
-    "masked-lm": "fill-mask",
-    "causal-lm": "text-generation",
-    "default": "feature-extraction",
-    "vision2seq-lm": "image-to-text",
-    "text-to-speech": "text-to-audio",
-    "seq2seq-lm": "text2text-generation",
-    "translation": "text2text-generation",
-    "summarization": "text2text-generation",
-    "mask-generation": "feature-extraction",
-    "audio-ctc": "automatic-speech-recognition",
-    "sentence-similarity": "feature-extraction",
-    "speech2seq-lm": "automatic-speech-recognition",
-    "sequence-classification": "text-classification",
-    "zero-shot-classification": "text-classification",
+from .backends.transformers_utils import get_transformers_pretrained_config
+from .import_utils import is_diffusers_available, is_torch_available
+
+TASKS_TO_AUTO_MODEL_CLASS_NAMES = {
+    # text processing
+    "feature-extraction": "AutoModel",
+    "fill-mask": "AutoModelForMaskedLM",
+    "multiple-choice": "AutoModelForMultipleChoice",
+    "question-answering": "AutoModelForQuestionAnswering",
+    "token-classification": "AutoModelForTokenClassification",
+    "text-classification": "AutoModelForSequenceClassification",
+    # audio processing
+    "audio-xvector": "AutoModelForAudioXVector",
+    "text-to-audio": "AutoModelForTextToSpectrogram",
+    "audio-classification": "AutoModelForAudioClassification",
+    "audio-frame-classification": "AutoModelForAudioFrameClassification",
+    # image processing
+    "mask-generation": "AutoModel",
+    "image-to-image": "AutoModelForImageToImage",
+    "masked-im": "AutoModelForMaskedImageModeling",
+    "object-detection": "AutoModelForObjectDetection",
+    "depth-estimation": "AutoModelForDepthEstimation",
+    "image-segmentation": "AutoModelForImageSegmentation",
+    "image-classification": "AutoModelForImageClassification",
+    "semantic-segmentation": "AutoModelForSemanticSegmentation",
+    "zero-shot-object-detection": "AutoModelForZeroShotObjectDetection",
+    "zero-shot-image-classification": "AutoModelForZeroShotImageClassification",
+    # text generation
+    "image-to-text": "AutoModelForVision2Seq",
+    "text-generation": "AutoModelForCausalLM",
+    "text2text-generation": "AutoModelForSeq2SeqLM",
+    "image-text-to-text": "AutoModelForImageTextToText",
+    "visual-question-answering": "AutoModelForVisualQuestionAnswering",
+    "automatic-speech-recognition": ("AutoModelForSpeechSeq2Seq", "AutoModelForCTC"),
+}
+
+TASKS_TO_AUTO_PIPELINE_CLASS_NAMES = {
+    "inpainting": "AutoPipelineForInpainting",
+    "text-to-image": "AutoPipelineForText2Image",
+    "image-to-image": "AutoPipelineForImage2Image",
 }
 
+TASKS_TO_MODEL_TYPES_TO_MODEL_CLASS_NAMES = {}
+
+if is_torch_available():
+    import transformers
+
+    for task_name, auto_model_class_names in TASKS_TO_AUTO_MODEL_CLASS_NAMES.items():
+        TASKS_TO_MODEL_TYPES_TO_MODEL_CLASS_NAMES[task_name] = {}
+
+        if isinstance(auto_model_class_names, str):
+            auto_model_class_names = (auto_model_class_names,)
+
+        for auto_model_class_name in auto_model_class_names:
+            auto_model_class = getattr(transformers, auto_model_class_name, None)
+            if auto_model_class is not None:
+                TASKS_TO_MODEL_TYPES_TO_MODEL_CLASS_NAMES[task_name].update(
+                    auto_model_class._model_mapping._model_mapping
+                )
+
+
+TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES = {}
+
+if is_diffusers_available():
+    import diffusers
+
+    if hasattr(diffusers, "pipelines") and hasattr(diffusers.pipelines, "auto_pipeline"):
+        from diffusers.pipelines.auto_pipeline import (
+            AUTO_IMAGE2IMAGE_PIPELINES_MAPPING,
+            AUTO_INPAINT_PIPELINES_MAPPING,
+            AUTO_TEXT2IMAGE_PIPELINES_MAPPING,
+        )
+
+        TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES = {
+            "inpainting": AUTO_INPAINT_PIPELINES_MAPPING.copy(),
+            "text-to-image": AUTO_TEXT2IMAGE_PIPELINES_MAPPING.copy(),
+            "image-to-image": AUTO_IMAGE2IMAGE_PIPELINES_MAPPING.copy(),
+        }
+
+        for task_name, pipeline_mapping in TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES.items():
+            for pipeline_type, pipeline_class in pipeline_mapping.items():
+                TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES[task_name][pipeline_type] = pipeline_class.__name__
+    else:
+        TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES = {}
+
+
 IMAGE_DIFFUSION_TASKS = [
     "inpainting",
     "text-to-image",
@@ -56,15 +112,34 @@
     "feature-extraction",
 ]
 
+SYNONYM_TASKS = {
+    "masked-lm": "fill-mask",
+    "causal-lm": "text-generation",
+    "default": "feature-extraction",
+    "vision2seq-lm": "image-to-text",
+    "text-to-speech": "text-to-audio",
+    "seq2seq-lm": "text2text-generation",
+    "translation": "text2text-generation",
+    "summarization": "text2text-generation",
+    "mask-generation": "feature-extraction",
+    "audio-ctc": "automatic-speech-recognition",
+    "sentence-similarity": "feature-extraction",
+    "speech2seq-lm": "automatic-speech-recognition",
+    "sequence-classification": "text-classification",
+    "zero-shot-classification": "text-classification",
+}
+
 
 def map_from_synonym(task: str) -> str:
-    if task in _SYNONYM_TASK_MAP:
-        task = _SYNONYM_TASK_MAP[task]
+    if task in SYNONYM_TASKS:
+        task = SYNONYM_TASKS[task]
     return task
 
 
 def infer_library_from_model_name_or_path(
-    model_name_or_path: str, revision: Optional[str] = None, token: Optional[str] = None
+    model_name_or_path: str,
+    token: Optional[str] = None,
+    revision: Optional[str] = None,
 ) -> str:
     inferred_library_name = None
 
@@ -77,6 +152,18 @@ def infer_library_from_model_name_or_path(
             repo_files = huggingface_hub.list_repo_files(model_name_or_path, revision=revision, token=token)
             if "model_index.json" in repo_files:
                 inferred_library_name = "diffusers"
+            elif "config.json" in repo_files:
+                config_dict = json.loads(
+                    huggingface_hub.hf_hub_download(
+                        repo_id=model_name_or_path, filename="config.json", revision=revision, token=token
+                    )
+                )
+                if "pretrained_cfg" in config_dict or "architecture" in config_dict:
+                    inferred_library_name = "timm"
+                elif "_diffusers_version" in config_dict:
+                    inferred_library_name = "diffusers"
+                else:
+                    inferred_library_name = "transformers"
 
         if inferred_library_name is None:
             raise RuntimeError(f"Could not infer library name from repo {model_name_or_path}.")
@@ -89,6 +176,7 @@ def infer_library_from_model_name_or_path(
             inferred_library_name = "diffusers"
         elif "config.json" in local_files:
             config_dict = json.load(open(os.path.join(model_name_or_path, "config.json"), "r"))
+
             if "pretrained_cfg" in config_dict or "architecture" in config_dict:
                 inferred_library_name = "timm"
             elif "_diffusers_version" in config_dict:
@@ -129,14 +217,36 @@ def infer_task_from_model_name_or_path(
     elif library_name == "sentence-transformers":
         inferred_task_name = "feature-extraction"
 
+    elif huggingface_hub.repo_exists(model_name_or_path, token=token):
+        model_info = huggingface_hub.model_info(model_name_or_path, revision=revision, token=token)
+
+        if model_info.pipeline_tag is not None:
+            inferred_task_name = map_from_synonym(model_info.pipeline_tag)
+
+        elif inferred_task_name is None:
+            if model_info.transformers_info is not None and model_info.transformersInfo.pipeline_tag is not None:
+                inferred_task_name = map_from_synonym(model_info.transformersInfo.pipeline_tag)
+            else:
+                target_auto_model = model_info.transformers_info["auto_model"]
+                for task_name, auto_model_class_names in TASKS_TO_AUTO_MODEL_CLASS_NAMES.items():
+                    if isinstance(auto_model_class_names, str):
+                        auto_model_class_names = (auto_model_class_names,)
+
+                    for auto_model_class_name in auto_model_class_names:
+                        if target_auto_model == auto_model_class_name:
+                            inferred_task_name = task_name
+                            break
+                    if inferred_task_name is not None:
+                        break
+
     elif os.path.isdir(model_name_or_path):
         if library_name == "diffusers":
             diffusers_config = get_diffusers_pretrained_config(model_name_or_path, revision=revision, token=token)
-            class_name = diffusers_config["_class_name"]
+            target_class_name = diffusers_config["_class_name"]
 
-            for task_name, model_mapping in DIFFUSERS_TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES.items():
-                for model_type, model_class_name in model_mapping.items():
-                    if class_name == model_class_name:
+            for task_name, pipeline_mapping in TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES.items():
+                for _, pipeline_class_name in pipeline_mapping.items():
+                    if target_class_name == pipeline_class_name:
                         inferred_task_name = task_name
                         break
                 if inferred_task_name is not None:
@@ -147,7 +257,7 @@ def infer_task_from_model_name_or_path(
             auto_modeling_module = importlib.import_module("transformers.models.auto.modeling_auto")
             model_type = transformers_config.model_type
 
-            for task_name, model_loaders in TRANSFORMERS_TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES.items():
+            for task_name, model_loaders in TASKS_TO_MODEL_TYPES_TO_MODEL_CLASS_NAMES.items():
                 if isinstance(model_loaders, str):
                     model_loaders = (model_loaders,)
                 for model_loader in model_loaders:
@@ -159,27 +269,6 @@ def infer_task_from_model_name_or_path(
                 if inferred_task_name is not None:
                     break
 
-    elif huggingface_hub.repo_exists(model_name_or_path, token=token):
-        model_info = huggingface_hub.model_info(model_name_or_path, revision=revision, token=token)
-
-        if model_info.pipeline_tag is not None:
-            inferred_task_name = map_from_synonym(model_info.pipeline_tag)
-
-        elif inferred_task_name is None:
-            if model_info.transformers_info is not None and model_info.transformersInfo.pipeline_tag is not None:
-                inferred_task_name = map_from_synonym(model_info.transformersInfo.pipeline_tag)
-            else:
-                auto_model_class_name = model_info.transformers_info["auto_model"]
-                for task_name, model_loaders in TASKS_TO_MODEL_LOADERS.items():
-                    if isinstance(model_loaders, str):
-                        model_loaders = (model_loaders,)
-                    for model_loader in model_loaders:
-                        if auto_model_class_name == model_loader:
-                            inferred_task_name = task_name
-                            break
-                    if inferred_task_name is not None:
-                        break
-
     if inferred_task_name is None:
         raise KeyError(f"Could not find the proper task name for {auto_model_class_name}.")
 
@@ -207,12 +296,12 @@ def infer_model_type_from_model_name_or_path(
 
     elif library_name == "diffusers":
         config = get_diffusers_pretrained_config(model_name_or_path, revision=revision, token=token)
-        class_name = config["_class_name"]
+        target_class_name = config["_class_name"]
 
-        for task_name, model_mapping in DIFFUSERS_TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES.items():
-            for model_type, model_class_name in model_mapping.items():
-                if model_class_name == class_name:
-                    inferred_model_type = model_type
+        for _, pipeline_mapping in TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES.items():
+            for pipeline_type, pipeline_class_name in pipeline_mapping.items():
+                if target_class_name == pipeline_class_name:
+                    inferred_model_type = pipeline_type
                     break
             if inferred_model_type is not None:
                 break
diff --git a/tests/configs/_st_bert_.yaml b/tests/configs/_st_bert_.yaml
new file mode 100644
index 00000000..05ef4026
--- /dev/null
+++ b/tests/configs/_st_bert_.yaml
@@ -0,0 +1,3 @@
+backend:
+  model: sentence-transformers/all-MiniLM-L6-v2
+  task: feature-extraction
diff --git a/tests/configs/cpu_inference_py_txi_bert.yaml b/tests/configs/cpu_inference_py_txi_st_bert.yaml
similarity index 77%
rename from tests/configs/cpu_inference_py_txi_bert.yaml
rename to tests/configs/cpu_inference_py_txi_st_bert.yaml
index a575be99..2650e1bf 100644
--- a/tests/configs/cpu_inference_py_txi_bert.yaml
+++ b/tests/configs/cpu_inference_py_txi_st_bert.yaml
@@ -3,8 +3,8 @@ defaults:
   - _base_ # inherits from base config
   - _cpu_ # inherits from cpu config
   - _inference_ # inherits from inference config
-  - _bert_ # inherits from bert config
+  - _st_bert_ # inherits from bert config
   - _self_ # hydra 1.1 compatibility
   - override backend: py-txi
 
-name: cpu_inference_py_txi_bert
+name: cpu_inference_py_txi_st_bert
diff --git a/tests/configs/cuda_inference_py_txi_bert.yaml b/tests/configs/cuda_inference_py_txi_st_bert.yaml
similarity index 77%
rename from tests/configs/cuda_inference_py_txi_bert.yaml
rename to tests/configs/cuda_inference_py_txi_st_bert.yaml
index 62405f30..8ae494e7 100644
--- a/tests/configs/cuda_inference_py_txi_bert.yaml
+++ b/tests/configs/cuda_inference_py_txi_st_bert.yaml
@@ -3,8 +3,8 @@ defaults:
   - _base_ # inherits from base config
   - _cuda_ # inherits from cuda config
   - _inference_ # inherits from inference config
-  - _bert_ # inherits from bert config
+  - _st_bert_ # inherits from bert config
   - _self_ # hydra 1.1 compatibility
   - override backend: py-txi
 
-name: cuda_inference_py_txi_bert
+name: cuda_inference_py_txi_st_bert
diff --git a/tests/test_api.py b/tests/test_api.py
index fd6e2dac..01851c34 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -22,8 +22,6 @@
 from optimum_benchmark.generators.dataset_generator import DatasetGenerator
 from optimum_benchmark.generators.input_generator import InputGenerator
 from optimum_benchmark.import_utils import get_git_revision_hash
-from optimum_benchmark.scenarios.inference.config import INPUT_SHAPES
-from optimum_benchmark.scenarios.training.config import DATASET_SHAPES
 from optimum_benchmark.system_utils import is_nvidia_system, is_rocm_system
 from optimum_benchmark.trackers import LatencyTracker, MemoryTracker
 
@@ -40,6 +38,18 @@
     ("diffusers", "text-to-image", "CompVis/stable-diffusion-v1-4"),
 ]
 
+INPUT_SHAPES = {
+    "batch_size": 2,  # for all tasks
+    "sequence_length": 16,  # for text processing tasks
+    "num_choices": 2,  # for multiple-choice task
+}
+
+DATASET_SHAPES = {
+    "dataset_size": 2,  # for all tasks
+    "sequence_length": 16,  # for text processing tasks
+    "num_choices": 2,  # for multiple-choice task
+}
+
 
 @pytest.mark.parametrize("device", ["cpu", "cuda"])
 @pytest.mark.parametrize("scenario", ["training", "inference"])
@@ -47,9 +57,6 @@
 def test_api_launch(device, scenario, library, task, model):
     benchmark_name = f"{device}_{scenario}_{library}_{task}_{model}"
 
-    if task == "multiple-choice":
-        INPUT_SHAPES["num_choices"] = 2
-
     if device == "cuda":
         device_isolation = True
         if is_rocm_system():
@@ -173,9 +180,6 @@ def test_api_input_generator(library, task, model):
     else:
         raise ValueError(f"Unknown library {library}")
 
-    if task == "multiple-choice":
-        INPUT_SHAPES["num_choices"] = 2
-
     input_generator = InputGenerator(
         task=task,
         input_shapes=INPUT_SHAPES,

From 00a4c21c20ae6c561abba473ef80d2413fe0448b Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Thu, 28 Nov 2024 09:46:13 +0100
Subject: [PATCH 10/16] fix

---
 README.md                                     |   1 +
 optimum_benchmark/backends/diffusers_utils.py |  14 +-
 .../backends/transformers_utils.py            |   9 +-
 optimum_benchmark/task_utils.py               | 128 +++++++++++++-----
 4 files changed, 110 insertions(+), 42 deletions(-)

diff --git a/README.md b/README.md
index 6f96561e..6358b341 100644
--- a/README.md
+++ b/README.md
@@ -61,6 +61,7 @@ Optimum-Benchmark is continuously and intensively tested on a variety of devices
 [![CLI_CUDA_TENSORRT_LLM](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_tensorrt_llm.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_tensorrt_llm.yaml)
 [![CLI_CUDA_TORCH_ORT](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_torch_ort.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_torch_ort.yaml)
 [![CLI_CUDA_VLLM](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_vllm.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_vllm.yaml)
+[![CLI_ENERGY_STAR](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_energy_star.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_energy_star.yaml)
 [![CLI_MISC](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_misc.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_misc.yaml)
 [![CLI_ROCM_PYTORCH](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_rocm_pytorch.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_rocm_pytorch.yaml)
 
diff --git a/optimum_benchmark/backends/diffusers_utils.py b/optimum_benchmark/backends/diffusers_utils.py
index ef1b4a59..4572be01 100644
--- a/optimum_benchmark/backends/diffusers_utils.py
+++ b/optimum_benchmark/backends/diffusers_utils.py
@@ -4,6 +4,7 @@
 from hydra.utils import get_class
 
 from ..import_utils import is_diffusers_available
+from ..task_utils import TASKS_TO_AUTO_PIPELINE_CLASS_NAMES, map_from_synonym
 
 if is_diffusers_available():
     import diffusers
@@ -11,14 +12,17 @@
 
 
 def get_diffusers_auto_pipeline_class_for_task(task: str):
-    from ..task_utils import TASKS_TO_AUTO_PIPELINE_CLASS_NAMES
-
     if not is_diffusers_available():
         raise ImportError("diffusers is not available. Please, pip install diffusers.")
 
-    model_loader_name = TASKS_TO_AUTO_PIPELINE_CLASS_NAMES.get(task, None)
-    model_loader_class = getattr(diffusers, model_loader_name)
-    return model_loader_class
+    task = map_from_synonym(task)
+
+    if task not in TASKS_TO_AUTO_PIPELINE_CLASS_NAMES:
+        raise ValueError(f"Task {task} not supported for diffusers")
+
+    model_loader_name = TASKS_TO_AUTO_PIPELINE_CLASS_NAMES[task]
+
+    return getattr(diffusers, model_loader_name)
 
 
 def get_diffusers_pretrained_config(model: str, **kwargs) -> Dict[str, int]:
diff --git a/optimum_benchmark/backends/transformers_utils.py b/optimum_benchmark/backends/transformers_utils.py
index 7226dd7c..c0234ba9 100644
--- a/optimum_benchmark/backends/transformers_utils.py
+++ b/optimum_benchmark/backends/transformers_utils.py
@@ -18,15 +18,14 @@
     SpecialTokensMixin,
 )
 
+from ..task_utils import TASKS_TO_AUTO_MODEL_CLASS_NAMES, map_from_synonym
 
-def get_transformers_auto_model_class_for_task(task: str, model_type: Optional[str] = None) -> Type["AutoModel"]:
-    from ..task_utils import SYNONYM_TASKS, TASKS_TO_AUTO_MODEL_CLASS_NAMES
 
-    if task in SYNONYM_TASKS:
-        task = SYNONYM_TASKS[task]
+def get_transformers_auto_model_class_for_task(task: str, model_type: Optional[str] = None) -> Type["AutoModel"]:
+    task = map_from_synonym(task)
 
     if task not in TASKS_TO_AUTO_MODEL_CLASS_NAMES:
-        raise ValueError(f"Task {task} not supported")
+        raise ValueError(f"Task {task} not supported for transformers")
 
     if isinstance(TASKS_TO_AUTO_MODEL_CLASS_NAMES[task], str):
         return getattr(transformers, TASKS_TO_AUTO_MODEL_CLASS_NAMES[task])
diff --git a/optimum_benchmark/task_utils.py b/optimum_benchmark/task_utils.py
index 1821b47d..7e82df36 100644
--- a/optimum_benchmark/task_utils.py
+++ b/optimum_benchmark/task_utils.py
@@ -5,10 +5,7 @@
 
 import huggingface_hub
 
-from .backends.diffusers_utils import get_diffusers_pretrained_config
-from .backends.timm_utils import get_timm_pretrained_config
-from .backends.transformers_utils import get_transformers_pretrained_config
-from .import_utils import is_diffusers_available, is_torch_available
+from .import_utils import is_diffusers_available, is_torch_available, is_transformers_available
 
 TASKS_TO_AUTO_MODEL_CLASS_NAMES = {
     # text processing
@@ -51,7 +48,7 @@
 
 TASKS_TO_MODEL_TYPES_TO_MODEL_CLASS_NAMES = {}
 
-if is_torch_available():
+if is_transformers_available() and is_torch_available():
     import transformers
 
     for task_name, auto_model_class_names in TASKS_TO_AUTO_MODEL_CLASS_NAMES.items():
@@ -88,9 +85,8 @@
 
         for task_name, pipeline_mapping in TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES.items():
             for pipeline_type, pipeline_class in pipeline_mapping.items():
+                # diffusers does not have a mappings with just class names
                 TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES[task_name][pipeline_type] = pipeline_class.__name__
-    else:
-        TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES = {}
 
 
 IMAGE_DIFFUSION_TASKS = [
@@ -133,13 +129,12 @@
 def map_from_synonym(task: str) -> str:
     if task in SYNONYM_TASKS:
         task = SYNONYM_TASKS[task]
+
     return task
 
 
 def infer_library_from_model_name_or_path(
-    model_name_or_path: str,
-    token: Optional[str] = None,
-    revision: Optional[str] = None,
+    model_name_or_path: str, token: Optional[str] = None, revision: Optional[str] = None
 ) -> str:
     inferred_library_name = None
 
@@ -154,9 +149,12 @@ def infer_library_from_model_name_or_path(
                 inferred_library_name = "diffusers"
             elif "config.json" in repo_files:
                 config_dict = json.loads(
-                    huggingface_hub.hf_hub_download(
-                        repo_id=model_name_or_path, filename="config.json", revision=revision, token=token
-                    )
+                    open(
+                        huggingface_hub.hf_hub_download(
+                            repo_id=model_name_or_path, filename="config.json", revision=revision, token=token
+                        ),
+                        mode="r",
+                    ).read()
                 )
                 if "pretrained_cfg" in config_dict or "architecture" in config_dict:
                     inferred_library_name = "timm"
@@ -164,6 +162,8 @@ def infer_library_from_model_name_or_path(
                     inferred_library_name = "diffusers"
                 else:
                     inferred_library_name = "transformers"
+            elif "onfig_sentence_transformers.json" in repo_files:
+                inferred_library_name = "sentence-transformers"
 
         if inferred_library_name is None:
             raise RuntimeError(f"Could not infer library name from repo {model_name_or_path}.")
@@ -175,7 +175,12 @@ def infer_library_from_model_name_or_path(
         if "model_index.json" in local_files:
             inferred_library_name = "diffusers"
         elif "config.json" in local_files:
-            config_dict = json.load(open(os.path.join(model_name_or_path, "config.json"), "r"))
+            config_dict = json.load(
+                open(
+                    os.path.join(model_name_or_path, "config.json"),
+                    mode="r",
+                )
+            )
 
             if "pretrained_cfg" in config_dict or "architecture" in config_dict:
                 inferred_library_name = "timm"
@@ -183,6 +188,8 @@ def infer_library_from_model_name_or_path(
                 inferred_library_name = "diffusers"
             else:
                 inferred_library_name = "transformers"
+        elif "config_sentence_transformers.json" in local_files:
+            inferred_library_name = "sentence-transformers"
 
         if inferred_library_name is None:
             raise KeyError(f"Could not find the proper library name for directory {model_name_or_path}.")
@@ -202,9 +209,9 @@ def infer_library_from_model_name_or_path(
 
 def infer_task_from_model_name_or_path(
     model_name_or_path: str,
-    library_name: Optional[str] = None,
-    revision: Optional[str] = None,
     token: Optional[str] = None,
+    revision: Optional[str] = None,
+    library_name: Optional[str] = None,
 ) -> str:
     if library_name is None:
         library_name = infer_library_from_model_name_or_path(model_name_or_path, revision=revision, token=token)
@@ -215,17 +222,17 @@ def infer_task_from_model_name_or_path(
         inferred_task_name = "image-classification"
 
     elif library_name == "sentence-transformers":
-        inferred_task_name = "feature-extraction"
+        inferred_task_name = "sentence-similarity"
 
     elif huggingface_hub.repo_exists(model_name_or_path, token=token):
         model_info = huggingface_hub.model_info(model_name_or_path, revision=revision, token=token)
 
         if model_info.pipeline_tag is not None:
-            inferred_task_name = map_from_synonym(model_info.pipeline_tag)
+            inferred_task_name = model_info.pipeline_tag
 
         elif inferred_task_name is None:
             if model_info.transformers_info is not None and model_info.transformersInfo.pipeline_tag is not None:
-                inferred_task_name = map_from_synonym(model_info.transformersInfo.pipeline_tag)
+                inferred_task_name = model_info.transformersInfo.pipeline_tag
             else:
                 target_auto_model = model_info.transformers_info["auto_model"]
                 for task_name, auto_model_class_names in TASKS_TO_AUTO_MODEL_CLASS_NAMES.items():
@@ -241,7 +248,12 @@ def infer_task_from_model_name_or_path(
 
     elif os.path.isdir(model_name_or_path):
         if library_name == "diffusers":
-            diffusers_config = get_diffusers_pretrained_config(model_name_or_path, revision=revision, token=token)
+            diffusers_config = json.load(
+                open(
+                    os.path.join(model_name_or_path, "model_index.json"),
+                    mode="r",
+                )
+            )
             target_class_name = diffusers_config["_class_name"]
 
             for task_name, pipeline_mapping in TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES.items():
@@ -253,7 +265,12 @@ def infer_task_from_model_name_or_path(
                     break
 
         elif library_name == "transformers":
-            transformers_config = get_transformers_pretrained_config(model_name_or_path, revision=revision, token=token)
+            transformers_config = json.load(
+                open(
+                    os.path.join(model_name_or_path, "config.json"),
+                    mode="r",
+                )
+            )
             auto_modeling_module = importlib.import_module("transformers.models.auto.modeling_auto")
             model_type = transformers_config.model_type
 
@@ -272,15 +289,16 @@ def infer_task_from_model_name_or_path(
     if inferred_task_name is None:
         raise KeyError(f"Could not find the proper task name for {auto_model_class_name}.")
 
+    inferred_task_name = map_from_synonym(inferred_task_name)
+
     return inferred_task_name
 
 
 def infer_model_type_from_model_name_or_path(
     model_name_or_path: str,
-    library_name: Optional[str] = None,
-    revision: Optional[str] = None,
     token: Optional[str] = None,
-    trust_remote_code: bool = False,
+    revision: Optional[str] = None,
+    library_name: Optional[str] = None,
 ) -> str:
     if library_name is None:
         library_name = infer_library_from_model_name_or_path(model_name_or_path, revision=revision, token=token)
@@ -291,12 +309,44 @@ def infer_model_type_from_model_name_or_path(
         inferred_model_type = "llama_cpp"
 
     elif library_name == "timm":
-        timm_config = get_timm_pretrained_config(model_name_or_path)
-        inferred_model_type = timm_config.architecture
+        if huggingface_hub.repo_exists(model_name_or_path, token=token):
+            timm_config = json.loads(
+                open(
+                    huggingface_hub.hf_hub_download(
+                        repo_id=model_name_or_path, filename="config.json", revision=revision, token=token
+                    ),
+                    mode="r",
+                ).read()
+            )
+        else:
+            timm_config = json.load(
+                open(
+                    os.path.join(model_name_or_path, "config.json"),
+                    mode="r",
+                )
+            )
+
+        inferred_model_type = timm_config["architecture"]
 
     elif library_name == "diffusers":
-        config = get_diffusers_pretrained_config(model_name_or_path, revision=revision, token=token)
-        target_class_name = config["_class_name"]
+        if huggingface_hub.repo_exists(model_name_or_path, token=token):
+            diffusers_config = json.loads(
+                open(
+                    huggingface_hub.hf_hub_download(
+                        repo_id=model_name_or_path, filename="model_index.json", revision=revision, token=token
+                    ),
+                    mode="r",
+                ).read()
+            )
+        else:
+            diffusers_config = json.load(
+                open(
+                    os.path.join(model_name_or_path, "model_index.json"),
+                    mode="r",
+                )
+            )
+
+        target_class_name = diffusers_config["_class_name"]
 
         for _, pipeline_mapping in TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES.items():
             for pipeline_type, pipeline_class_name in pipeline_mapping.items():
@@ -307,10 +357,24 @@ def infer_model_type_from_model_name_or_path(
                 break
 
     else:
-        transformers_config = get_transformers_pretrained_config(
-            model_name_or_path, revision=revision, token=token, trust_remote_code=trust_remote_code
-        )
-        inferred_model_type = transformers_config.model_type
+        if huggingface_hub.repo_exists(model_name_or_path, token=token):
+            transformers_config = json.loads(
+                open(
+                    huggingface_hub.hf_hub_download(
+                        repo_id=model_name_or_path, filename="config.json", revision=revision, token=token
+                    ),
+                    mode="r",
+                ).read()
+            )
+        else:
+            transformers_config = json.load(
+                open(
+                    os.path.join(model_name_or_path, "config.json"),
+                    mode="r",
+                )
+            )
+
+        inferred_model_type = transformers_config["model_type"]
 
     if inferred_model_type is None:
         raise KeyError(f"Could not find the proper model type for {model_name_or_path}.")

From 802975e05ea147528108d0c5d6e826376c7a1ee8 Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Thu, 28 Nov 2024 09:50:23 +0100
Subject: [PATCH 11/16] fix

---
 optimum_benchmark/backends/config.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/optimum_benchmark/backends/config.py b/optimum_benchmark/backends/config.py
index cc4f6a24..fc265d4d 100644
--- a/optimum_benchmark/backends/config.py
+++ b/optimum_benchmark/backends/config.py
@@ -54,26 +54,25 @@ def __post_init__(self):
         # TODO: add cache_dir, token, etc. to these methods
         if self.library is None:
             self.library = infer_library_from_model_name_or_path(
-                self.model,
+                model_name_or_path=self.model,
                 token=self.model_kwargs.get("token", None),
                 revision=self.model_kwargs.get("revision", None),
             )
 
         if self.task is None:
             self.task = infer_task_from_model_name_or_path(
-                self.model,
-                self.library,
+                model_name_or_path=self.model,
                 token=self.model_kwargs.get("token", None),
                 revision=self.model_kwargs.get("revision", None),
+                library_name=self.library,
             )
 
         if self.model_type is None:
             self.model_type = infer_model_type_from_model_name_or_path(
-                self.model,
-                self.library,
+                model_name_or_path=self.model,
                 token=self.model_kwargs.get("token", None),
                 revision=self.model_kwargs.get("revision", None),
-                trust_remote_code=self.model_kwargs.get("trust_remote_code", False),
+                library_name=self.library,
             )
 
         if self.device is None:

From f9cf245fe6995df2c5f23bc77b92e835e634f9fd Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Thu, 28 Nov 2024 10:30:31 +0100
Subject: [PATCH 12/16] better task utils that support local repos

---
 optimum_benchmark/task_utils.py | 274 +++++++++++---------------------
 1 file changed, 97 insertions(+), 177 deletions(-)

diff --git a/optimum_benchmark/task_utils.py b/optimum_benchmark/task_utils.py
index 7e82df36..cc9dff5c 100644
--- a/optimum_benchmark/task_utils.py
+++ b/optimum_benchmark/task_utils.py
@@ -1,4 +1,3 @@
-import importlib
 import json
 import os
 from typing import Optional
@@ -133,74 +132,83 @@ def map_from_synonym(task: str) -> str:
     return task
 
 
+def is_hf_hub_repo(model_name_or_path: str, token: Optional[str] = None) -> bool:
+    try:
+        return huggingface_hub.repo_exists(model_name_or_path, token=token)
+    except Exception:
+        return False
+
+
+def is_local_dir_repo(model_name_or_path: str) -> bool:
+    return os.path.isdir(model_name_or_path)
+
+
+def get_repo_config(
+    model_name_or_path: str, config_name: str, token: Optional[str] = None, revision: Optional[str] = None
+):
+    if is_hf_hub_repo(model_name_or_path, token=token):
+        config = json.loads(
+            open(
+                huggingface_hub.hf_hub_download(
+                    repo_id=model_name_or_path,
+                    filename=config_name,
+                    revision=revision,
+                    token=token,
+                ),
+                mode="r",
+            )
+        )
+    elif is_local_dir_repo(model_name_or_path):
+        config = json.load(
+            open(
+                os.path.join(model_name_or_path, config_name),
+                mode="r",
+            )
+        )
+    else:
+        raise KeyError(f"`{model_name_or_path}` is neither an hf hub repo nor a local directory.")
+
+    return config
+
+
+def get_repo_files(model_name_or_path: str, token: Optional[str] = None, revision: Optional[str] = None):
+    if is_hf_hub_repo(model_name_or_path, token=token):
+        repo_files = huggingface_hub.list_repo_files(model_name_or_path, revision=revision, token=token)
+    elif is_local_dir_repo(model_name_or_path):
+        repo_files = os.listdir(model_name_or_path)
+    else:
+        raise KeyError(f"`{model_name_or_path}` is neither an hf hub repo nor a local directory.")
+
+    return repo_files
+
+
 def infer_library_from_model_name_or_path(
-    model_name_or_path: str, token: Optional[str] = None, revision: Optional[str] = None
+    model_name_or_path: str,
+    token: Optional[str] = None,
+    revision: Optional[str] = None,
 ) -> str:
     inferred_library_name = None
 
-    # if model_name_or_path is a repo
-    if huggingface_hub.repo_exists(model_name_or_path, token=token):
-        model_info = huggingface_hub.model_info(model_name_or_path, revision=revision, token=token)
-        inferred_library_name = getattr(model_info, "library_name", None)
-
-        if inferred_library_name is None:
-            repo_files = huggingface_hub.list_repo_files(model_name_or_path, revision=revision, token=token)
-            if "model_index.json" in repo_files:
-                inferred_library_name = "diffusers"
-            elif "config.json" in repo_files:
-                config_dict = json.loads(
-                    open(
-                        huggingface_hub.hf_hub_download(
-                            repo_id=model_name_or_path, filename="config.json", revision=revision, token=token
-                        ),
-                        mode="r",
-                    ).read()
-                )
-                if "pretrained_cfg" in config_dict or "architecture" in config_dict:
-                    inferred_library_name = "timm"
-                elif "_diffusers_version" in config_dict:
-                    inferred_library_name = "diffusers"
-                else:
-                    inferred_library_name = "transformers"
-            elif "onfig_sentence_transformers.json" in repo_files:
-                inferred_library_name = "sentence-transformers"
-
-        if inferred_library_name is None:
-            raise RuntimeError(f"Could not infer library name from repo {model_name_or_path}.")
-
-    # if model_name_or_path is a directory
-    elif os.path.isdir(model_name_or_path):
-        local_files = os.listdir(model_name_or_path)
-
-        if "model_index.json" in local_files:
-            inferred_library_name = "diffusers"
-        elif "config.json" in local_files:
-            config_dict = json.load(
-                open(
-                    os.path.join(model_name_or_path, "config.json"),
-                    mode="r",
-                )
-            )
+    repo_files = get_repo_files(model_name_or_path, token=token, revision=revision)
 
-            if "pretrained_cfg" in config_dict or "architecture" in config_dict:
-                inferred_library_name = "timm"
-            elif "_diffusers_version" in config_dict:
-                inferred_library_name = "diffusers"
-            else:
-                inferred_library_name = "transformers"
-        elif "config_sentence_transformers.json" in local_files:
-            inferred_library_name = "sentence-transformers"
+    if "model_index.json" in repo_files:
+        inferred_library_name = "diffusers"
 
-        if inferred_library_name is None:
-            raise KeyError(f"Could not find the proper library name for directory {model_name_or_path}.")
+    elif "config_sentence_transformers.json" in repo_files:
+        inferred_library_name = "sentence-transformers"
 
-    else:
-        raise KeyError(
-            f"Could not find the proper library name for {model_name_or_path}"
-            " because it's neither a repo nor a directory."
-        )
+    elif "config.json" in repo_files:
+        config_dict = get_repo_config(model_name_or_path, token=token, revision=revision)
 
-    # for now, we still use transformers for sentence-transformers
+        if "pretrained_cfg" in config_dict:
+            inferred_library_name = "timm"
+        else:
+            inferred_library_name = "transformers"
+
+    if inferred_library_name is None:
+        raise KeyError(f"Could not find the proper library name for directory {model_name_or_path}.")
+
+    # for now, we use transformers for sentence-transformers models
     if inferred_library_name == "sentence-transformers":
         inferred_library_name = "transformers"
 
@@ -213,85 +221,45 @@ def infer_task_from_model_name_or_path(
     revision: Optional[str] = None,
     library_name: Optional[str] = None,
 ) -> str:
+    inferred_task_name = None
+
     if library_name is None:
         library_name = infer_library_from_model_name_or_path(model_name_or_path, revision=revision, token=token)
 
-    inferred_task_name = None
-
     if library_name == "timm":
         inferred_task_name = "image-classification"
 
     elif library_name == "sentence-transformers":
         inferred_task_name = "sentence-similarity"
 
-    elif huggingface_hub.repo_exists(model_name_or_path, token=token):
-        model_info = huggingface_hub.model_info(model_name_or_path, revision=revision, token=token)
-
-        if model_info.pipeline_tag is not None:
-            inferred_task_name = model_info.pipeline_tag
-
-        elif inferred_task_name is None:
-            if model_info.transformers_info is not None and model_info.transformersInfo.pipeline_tag is not None:
-                inferred_task_name = model_info.transformersInfo.pipeline_tag
-            else:
-                target_auto_model = model_info.transformers_info["auto_model"]
-                for task_name, auto_model_class_names in TASKS_TO_AUTO_MODEL_CLASS_NAMES.items():
-                    if isinstance(auto_model_class_names, str):
-                        auto_model_class_names = (auto_model_class_names,)
-
-                    for auto_model_class_name in auto_model_class_names:
-                        if target_auto_model == auto_model_class_name:
-                            inferred_task_name = task_name
-                            break
-                    if inferred_task_name is not None:
-                        break
-
-    elif os.path.isdir(model_name_or_path):
-        if library_name == "diffusers":
-            diffusers_config = json.load(
-                open(
-                    os.path.join(model_name_or_path, "model_index.json"),
-                    mode="r",
-                )
-            )
-            target_class_name = diffusers_config["_class_name"]
-
-            for task_name, pipeline_mapping in TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES.items():
-                for _, pipeline_class_name in pipeline_mapping.items():
-                    if target_class_name == pipeline_class_name:
-                        inferred_task_name = task_name
-                        break
-                if inferred_task_name is not None:
+    elif library_name == "diffusers":
+        diffusers_config = get_repo_config(model_name_or_path, "model_index.json", token=token, revision=revision)
+        target_class_name = diffusers_config["_class_name"]
+
+        for task_name, pipeline_mapping in TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES.items():
+            for _, pipeline_class_name in pipeline_mapping.items():
+                if target_class_name == pipeline_class_name:
+                    inferred_task_name = task_name
                     break
+            if inferred_task_name is not None:
+                break
 
-        elif library_name == "transformers":
-            transformers_config = json.load(
-                open(
-                    os.path.join(model_name_or_path, "config.json"),
-                    mode="r",
-                )
-            )
-            auto_modeling_module = importlib.import_module("transformers.models.auto.modeling_auto")
-            model_type = transformers_config.model_type
-
-            for task_name, model_loaders in TASKS_TO_MODEL_TYPES_TO_MODEL_CLASS_NAMES.items():
-                if isinstance(model_loaders, str):
-                    model_loaders = (model_loaders,)
-                for model_loader in model_loaders:
-                    model_loader_class = getattr(auto_modeling_module, model_loader)
-                    model_mapping = model_loader_class._model_mapping._model_mapping
-                    if model_type in model_mapping:
-                        inferred_task_name = task_name
-                        break
-                if inferred_task_name is not None:
+    elif library_name == "transformers":
+        transformers_config = get_repo_config(model_name_or_path, "config.json", token=token, revision=revision)
+        target_class_name = transformers_config["architectures"][0]
+
+        for task_name, model_mapping in TASKS_TO_MODEL_TYPES_TO_MODEL_CLASS_NAMES.items():
+            for _, model_class_name in model_mapping.items():
+                if target_class_name == model_class_name:
+                    inferred_task_name = task_name
                     break
+            if inferred_task_name is not None:
+                break
 
     if inferred_task_name is None:
         raise KeyError(f"Could not find the proper task name for {auto_model_class_name}.")
 
-    inferred_task_name = map_from_synonym(inferred_task_name)
-
-    return inferred_task_name
+    return map_from_synonym(inferred_task_name)
 
 
 def infer_model_type_from_model_name_or_path(
@@ -300,52 +268,20 @@ def infer_model_type_from_model_name_or_path(
     revision: Optional[str] = None,
     library_name: Optional[str] = None,
 ) -> str:
+    inferred_model_type = None
+
     if library_name is None:
         library_name = infer_library_from_model_name_or_path(model_name_or_path, revision=revision, token=token)
 
-    inferred_model_type = None
-
     if library_name == "llama_cpp":
         inferred_model_type = "llama_cpp"
 
     elif library_name == "timm":
-        if huggingface_hub.repo_exists(model_name_or_path, token=token):
-            timm_config = json.loads(
-                open(
-                    huggingface_hub.hf_hub_download(
-                        repo_id=model_name_or_path, filename="config.json", revision=revision, token=token
-                    ),
-                    mode="r",
-                ).read()
-            )
-        else:
-            timm_config = json.load(
-                open(
-                    os.path.join(model_name_or_path, "config.json"),
-                    mode="r",
-                )
-            )
-
+        timm_config = get_repo_config(model_name_or_path, "config.json", token=token, revision=revision)
         inferred_model_type = timm_config["architecture"]
 
     elif library_name == "diffusers":
-        if huggingface_hub.repo_exists(model_name_or_path, token=token):
-            diffusers_config = json.loads(
-                open(
-                    huggingface_hub.hf_hub_download(
-                        repo_id=model_name_or_path, filename="model_index.json", revision=revision, token=token
-                    ),
-                    mode="r",
-                ).read()
-            )
-        else:
-            diffusers_config = json.load(
-                open(
-                    os.path.join(model_name_or_path, "model_index.json"),
-                    mode="r",
-                )
-            )
-
+        diffusers_config = get_repo_config(model_name_or_path, "model_index.json", token=token, revision=revision)
         target_class_name = diffusers_config["_class_name"]
 
         for _, pipeline_mapping in TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES.items():
@@ -357,23 +293,7 @@ def infer_model_type_from_model_name_or_path(
                 break
 
     else:
-        if huggingface_hub.repo_exists(model_name_or_path, token=token):
-            transformers_config = json.loads(
-                open(
-                    huggingface_hub.hf_hub_download(
-                        repo_id=model_name_or_path, filename="config.json", revision=revision, token=token
-                    ),
-                    mode="r",
-                ).read()
-            )
-        else:
-            transformers_config = json.load(
-                open(
-                    os.path.join(model_name_or_path, "config.json"),
-                    mode="r",
-                )
-            )
-
+        transformers_config = get_repo_config(model_name_or_path, "config.json", token=token, revision=revision)
         inferred_model_type = transformers_config["model_type"]
 
     if inferred_model_type is None:

From 27b5d9a0250885a9dd01ad5bfecde8a1e24c6c5e Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Thu, 28 Nov 2024 10:48:46 +0100
Subject: [PATCH 13/16] fix

---
 optimum_benchmark/task_utils.py | 36 +++++++++++++++++++++------------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/optimum_benchmark/task_utils.py b/optimum_benchmark/task_utils.py
index cc9dff5c..31d0cc77 100644
--- a/optimum_benchmark/task_utils.py
+++ b/optimum_benchmark/task_utils.py
@@ -124,14 +124,25 @@
     "zero-shot-classification": "text-classification",
 }
 
+SYNONYM_LIBRARIES = {
+    "sentence-transformers": "transformers",
+}
+
 
-def map_from_synonym(task: str) -> str:
+def map_from_synonym_task(task: str) -> str:
     if task in SYNONYM_TASKS:
         task = SYNONYM_TASKS[task]
 
     return task
 
 
+def map_from_synonym_library(library: str) -> str:
+    if library in SYNONYM_LIBRARIES:
+        library = SYNONYM_LIBRARIES[library]
+
+    return library
+
+
 def is_hf_hub_repo(model_name_or_path: str, token: Optional[str] = None) -> bool:
     try:
         return huggingface_hub.repo_exists(model_name_or_path, token=token)
@@ -198,21 +209,20 @@ def infer_library_from_model_name_or_path(
         inferred_library_name = "sentence-transformers"
 
     elif "config.json" in repo_files:
-        config_dict = get_repo_config(model_name_or_path, token=token, revision=revision)
+        config_dict = get_repo_config(model_name_or_path, "config.json", token=token, revision=revision)
 
         if "pretrained_cfg" in config_dict:
             inferred_library_name = "timm"
         else:
             inferred_library_name = "transformers"
 
+    elif any(file.endswith(".gguf") or file.endswith(".GGUF") for file in repo_files):
+        inferred_library_name = "llama_cpp"
+
     if inferred_library_name is None:
         raise KeyError(f"Could not find the proper library name for directory {model_name_or_path}.")
 
-    # for now, we use transformers for sentence-transformers models
-    if inferred_library_name == "sentence-transformers":
-        inferred_library_name = "transformers"
-
-    return inferred_library_name
+    return map_from_synonym_library(inferred_library_name)
 
 
 def infer_task_from_model_name_or_path(
@@ -226,11 +236,11 @@ def infer_task_from_model_name_or_path(
     if library_name is None:
         library_name = infer_library_from_model_name_or_path(model_name_or_path, revision=revision, token=token)
 
-    if library_name == "timm":
-        inferred_task_name = "image-classification"
+    if library_name == "llama_cpp":
+        inferred_task_name = "text-generation"
 
-    elif library_name == "sentence-transformers":
-        inferred_task_name = "sentence-similarity"
+    elif library_name == "timm":
+        inferred_task_name = "image-classification"
 
     elif library_name == "diffusers":
         diffusers_config = get_repo_config(model_name_or_path, "model_index.json", token=token, revision=revision)
@@ -259,7 +269,7 @@ def infer_task_from_model_name_or_path(
     if inferred_task_name is None:
         raise KeyError(f"Could not find the proper task name for {auto_model_class_name}.")
 
-    return map_from_synonym(inferred_task_name)
+    return map_from_synonym_task(inferred_task_name)
 
 
 def infer_model_type_from_model_name_or_path(
@@ -292,7 +302,7 @@ def infer_model_type_from_model_name_or_path(
             if inferred_model_type is not None:
                 break
 
-    else:
+    elif library_name == "transformers":
         transformers_config = get_repo_config(model_name_or_path, "config.json", token=token, revision=revision)
         inferred_model_type = transformers_config["model_type"]
 

From f9b6e6f920fee2d668b7e2c5289b4332ac2d373d Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Thu, 28 Nov 2024 10:49:41 +0100
Subject: [PATCH 14/16] fix

---
 optimum_benchmark/backends/diffusers_utils.py    | 4 ++--
 optimum_benchmark/backends/transformers_utils.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/optimum_benchmark/backends/diffusers_utils.py b/optimum_benchmark/backends/diffusers_utils.py
index 4572be01..126d724d 100644
--- a/optimum_benchmark/backends/diffusers_utils.py
+++ b/optimum_benchmark/backends/diffusers_utils.py
@@ -4,7 +4,7 @@
 from hydra.utils import get_class
 
 from ..import_utils import is_diffusers_available
-from ..task_utils import TASKS_TO_AUTO_PIPELINE_CLASS_NAMES, map_from_synonym
+from ..task_utils import TASKS_TO_AUTO_PIPELINE_CLASS_NAMES, map_from_synonym_task
 
 if is_diffusers_available():
     import diffusers
@@ -15,7 +15,7 @@ def get_diffusers_auto_pipeline_class_for_task(task: str):
     if not is_diffusers_available():
         raise ImportError("diffusers is not available. Please, pip install diffusers.")
 
-    task = map_from_synonym(task)
+    task = map_from_synonym_task(task)
 
     if task not in TASKS_TO_AUTO_PIPELINE_CLASS_NAMES:
         raise ValueError(f"Task {task} not supported for diffusers")
diff --git a/optimum_benchmark/backends/transformers_utils.py b/optimum_benchmark/backends/transformers_utils.py
index c0234ba9..58feb7af 100644
--- a/optimum_benchmark/backends/transformers_utils.py
+++ b/optimum_benchmark/backends/transformers_utils.py
@@ -18,11 +18,11 @@
     SpecialTokensMixin,
 )
 
-from ..task_utils import TASKS_TO_AUTO_MODEL_CLASS_NAMES, map_from_synonym
+from ..task_utils import TASKS_TO_AUTO_MODEL_CLASS_NAMES, map_from_synonym_task
 
 
 def get_transformers_auto_model_class_for_task(task: str, model_type: Optional[str] = None) -> Type["AutoModel"]:
-    task = map_from_synonym(task)
+    task = map_from_synonym_task(task)
 
     if task not in TASKS_TO_AUTO_MODEL_CLASS_NAMES:
         raise ValueError(f"Task {task} not supported for transformers")

From f85fea8e5d282dc63dcc30d5881a6bd296ea5f03 Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Thu, 28 Nov 2024 10:53:26 +0100
Subject: [PATCH 15/16] fix

---
 optimum_benchmark/task_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/optimum_benchmark/task_utils.py b/optimum_benchmark/task_utils.py
index 31d0cc77..c5b43739 100644
--- a/optimum_benchmark/task_utils.py
+++ b/optimum_benchmark/task_utils.py
@@ -158,7 +158,7 @@ def get_repo_config(
     model_name_or_path: str, config_name: str, token: Optional[str] = None, revision: Optional[str] = None
 ):
     if is_hf_hub_repo(model_name_or_path, token=token):
-        config = json.loads(
+        config = json.load(
             open(
                 huggingface_hub.hf_hub_download(
                     repo_id=model_name_or_path,

From 7343ce83e8ae5eabbae9ef800bec6a2df2a9ebff Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Thu, 28 Nov 2024 11:01:51 +0100
Subject: [PATCH 16/16] style

---
 optimum_benchmark/backends/diffusers_utils.py    | 4 ++--
 optimum_benchmark/backends/peft_utils.py         | 3 ++-
 optimum_benchmark/backends/timm_utils.py         | 8 ++------
 optimum_benchmark/backends/transformers_utils.py | 6 +++---
 4 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/optimum_benchmark/backends/diffusers_utils.py b/optimum_benchmark/backends/diffusers_utils.py
index 126d724d..345d30f0 100644
--- a/optimum_benchmark/backends/diffusers_utils.py
+++ b/optimum_benchmark/backends/diffusers_utils.py
@@ -12,11 +12,11 @@
 
 
 def get_diffusers_auto_pipeline_class_for_task(task: str):
+    task = map_from_synonym_task(task)
+
     if not is_diffusers_available():
         raise ImportError("diffusers is not available. Please, pip install diffusers.")
 
-    task = map_from_synonym_task(task)
-
     if task not in TASKS_TO_AUTO_PIPELINE_CLASS_NAMES:
         raise ValueError(f"Task {task} not supported for diffusers")
 
diff --git a/optimum_benchmark/backends/peft_utils.py b/optimum_benchmark/backends/peft_utils.py
index 92e71039..95b54b1e 100644
--- a/optimum_benchmark/backends/peft_utils.py
+++ b/optimum_benchmark/backends/peft_utils.py
@@ -8,9 +8,10 @@
     from peft import PEFT_TYPE_TO_CONFIG_MAPPING, get_peft_model  # type: ignore
 
 
-def apply_peft(model: PreTrainedModel, peft_type: str, peft_config: Dict[str, Any]) -> PreTrainedModel:
+def apply_peft(model: "PreTrainedModel", peft_type: str, peft_config: Dict[str, Any]) -> "PreTrainedModel":
     if not is_peft_available():
         raise ImportError("peft is not available. Please, pip install peft.")
 
     peft_config = PEFT_TYPE_TO_CONFIG_MAPPING[peft_type](**peft_config)
+
     return get_peft_model(model=model, peft_config=peft_config)
diff --git a/optimum_benchmark/backends/timm_utils.py b/optimum_benchmark/backends/timm_utils.py
index 4cb3cd1c..7dc26e12 100644
--- a/optimum_benchmark/backends/timm_utils.py
+++ b/optimum_benchmark/backends/timm_utils.py
@@ -1,4 +1,3 @@
-import warnings
 from typing import Any, Dict
 
 from transformers import PretrainedConfig
@@ -17,7 +16,7 @@ def get_timm_model_creator():
     return create_model
 
 
-def get_timm_pretrained_config(model_name: str) -> PretrainedConfig:
+def get_timm_pretrained_config(model_name: str) -> "PretrainedConfig":
     if not is_timm_available():
         raise ImportError("timm is not available. Please, pip install timm.")
 
@@ -31,7 +30,7 @@ def get_timm_pretrained_config(model_name: str) -> PretrainedConfig:
     return get_pretrained_cfg(model_name)
 
 
-def extract_timm_shapes_from_config(config: PretrainedConfig) -> Dict[str, Any]:
+def extract_timm_shapes_from_config(config: "PretrainedConfig") -> Dict[str, Any]:
     if not is_timm_available():
         raise ImportError("timm is not available. Please, pip install timm.")
 
@@ -74,7 +73,4 @@ def extract_timm_shapes_from_config(config: PretrainedConfig) -> Dict[str, Any]:
         shapes["height"] = input_size[1]
         shapes["width"] = input_size[2]
 
-    if "num_classes" not in artifacts_dict:
-        warnings.warn("Could not extract shapes [num_channels, height, width] from timm model config.")
-
     return shapes
diff --git a/optimum_benchmark/backends/transformers_utils.py b/optimum_benchmark/backends/transformers_utils.py
index 58feb7af..a623d43c 100644
--- a/optimum_benchmark/backends/transformers_utils.py
+++ b/optimum_benchmark/backends/transformers_utils.py
@@ -3,6 +3,7 @@
 
 import torch
 import transformers
+from torch import Tensor
 from transformers import (
     AutoConfig,
     AutoFeatureExtractor,
@@ -84,7 +85,7 @@ def get_flat_dict(d: Dict[str, Any]) -> Dict[str, Any]:
     return flat_dict
 
 
-def get_flat_artifact_dict(artifact: Union[PretrainedConfig, PretrainedProcessor]) -> Dict[str, Any]:
+def get_flat_artifact_dict(artifact: Union["PretrainedConfig", "PretrainedProcessor"]) -> Dict[str, Any]:
     artifact_dict = {}
 
     if isinstance(artifact, ProcessorMixin):
@@ -175,7 +176,6 @@ def extract_transformers_shapes_from_artifacts(
         shapes["num_queries"] = flat_artifacts_dict["num_queries"]
 
     # image-text input
-
     if "patch_size" in flat_artifacts_dict:
         shapes["patch_size"] = flat_artifacts_dict["patch_size"]
     if "in_chans" in flat_artifacts_dict:
@@ -212,7 +212,7 @@ def extract_transformers_shapes_from_artifacts(
 }
 
 
-def fast_random_tensor(tensor: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor:
+def fast_random_tensor(tensor: "Tensor", *args: Any, **kwargs: Any) -> "Tensor":
     return torch.nn.init.uniform_(tensor)