From 30a44728ab9fa6a29862039cc48e75913bd5b797 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Tue, 26 Nov 2024 15:17:41 +0100 Subject: [PATCH 01/16] fix llamacpp and windows libuv --- optimum_benchmark/launchers/torchrun/launcher.py | 10 ++++++---- tests/configs/_gguf_.yaml | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/optimum_benchmark/launchers/torchrun/launcher.py b/optimum_benchmark/launchers/torchrun/launcher.py index 98c076ee..b24c1c79 100644 --- a/optimum_benchmark/launchers/torchrun/launcher.py +++ b/optimum_benchmark/launchers/torchrun/launcher.py @@ -24,6 +24,10 @@ class TorchrunLauncher(Launcher[TorchrunConfig]): def __init__(self, config: TorchrunConfig): super().__init__(config) + if sys.platform == "win32": + self.logger.info("\t+ Disabline libuv on Windows") + os.environ["USE_LIBUV"] = "0" + if get_start_method(allow_none=True) != self.config.start_method: self.logger.info(f"\t+ Setting multiprocessing start method to {self.config.start_method}") set_start_method(self.config.start_method, force=True) @@ -101,7 +105,7 @@ def launch(self, worker: Callable[..., BenchmarkReport], worker_args: List[Any]) raise RuntimeError(f"Received an unexpected response from isolated process: {output}") self.logger.info("\t+ Aggregating reports from all rank processes") - report = BenchmarkReport.aggregate(reports) + report = BenchmarkReport.aggregate_across_processes(reports) return report @@ -155,9 +159,7 @@ def entrypoint(worker: Callable[..., BenchmarkReport], worker_args: List[Any], l else: setup_logging(level="ERROR", to_file=log_to_file, prefix=f"RANK-PROCESS-{rank}") - if sys.platform == "win32": - logger.info("\t+ Disabline libuv on Windows") - os.environ["USE_LIBUV"] = "0" + if torch.cuda.is_available(): logger.info(f"\t+ Setting torch.distributed cuda device to {rank}") diff --git a/tests/configs/_gguf_.yaml b/tests/configs/_gguf_.yaml index 007a03e7..41ef8027 100644 --- a/tests/configs/_gguf_.yaml +++ b/tests/configs/_gguf_.yaml @@ -2,6 +2,6 @@ hydra: mode: MULTIRUN sweeper: params: + backend.model: ggml-org/models backend.task: text-generation,feature-extraction - backend.model: QuantFactory/gpt2-GGUF - backend.filename: gpt2.Q4_0.gguf + backend.filename: tinyllamas/stories15M-q8_0.gguf From f4ee5b4b7e383259a9675b890de8320d8111c5d4 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Tue, 26 Nov 2024 15:19:54 +0100 Subject: [PATCH 02/16] fix --- optimum_benchmark/launchers/torchrun/launcher.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/optimum_benchmark/launchers/torchrun/launcher.py b/optimum_benchmark/launchers/torchrun/launcher.py index b24c1c79..98eb4a37 100644 --- a/optimum_benchmark/launchers/torchrun/launcher.py +++ b/optimum_benchmark/launchers/torchrun/launcher.py @@ -105,7 +105,7 @@ def launch(self, worker: Callable[..., BenchmarkReport], worker_args: List[Any]) raise RuntimeError(f"Received an unexpected response from isolated process: {output}") self.logger.info("\t+ Aggregating reports from all rank processes") - report = BenchmarkReport.aggregate_across_processes(reports) + report = BenchmarkReport.aggregate(reports) return report @@ -159,8 +159,6 @@ def entrypoint(worker: Callable[..., BenchmarkReport], worker_args: List[Any], l else: setup_logging(level="ERROR", to_file=log_to_file, prefix=f"RANK-PROCESS-{rank}") - - if torch.cuda.is_available(): logger.info(f"\t+ Setting torch.distributed cuda device to {rank}") device = torch.device("cuda", rank) From 7c7d729f42fabf75d4a5aff54fa8f925954ac987 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Tue, 26 Nov 2024 15:34:06 +0100 Subject: [PATCH 03/16] fix --- optimum_benchmark/backends/base.py | 7 ++----- optimum_benchmark/backends/llama_cpp/backend.py | 9 ++------- optimum_benchmark/launchers/torchrun/launcher.py | 12 +++++++----- 3 files changed, 11 insertions(+), 17 deletions(-) diff --git a/optimum_benchmark/backends/base.py b/optimum_benchmark/backends/base.py index 6726f91f..1c039163 100644 --- a/optimum_benchmark/backends/base.py +++ b/optimum_benchmark/backends/base.py @@ -70,14 +70,11 @@ def __init__(self, config: BackendConfigT): elif self.config.library == "llama_cpp": self.logger.info("\t+ Benchmarking a LlamaCpp model") - # TOD: need a custom method to extract shapes from gguf - self.model_shapes = extract_transformers_shapes_from_artifacts( - self.pretrained_config, self.pretrained_processor - ) self.pretrained_processor = None - self.generation_config = None self.pretrained_config = None + self.generation_config = None self.automodel_loader = None + self.model_shapes = {} else: self.logger.info("\t+ Benchmarking a Transformers model") diff --git a/optimum_benchmark/backends/llama_cpp/backend.py b/optimum_benchmark/backends/llama_cpp/backend.py index 06215cbf..c9d6bbf8 100644 --- a/optimum_benchmark/backends/llama_cpp/backend.py +++ b/optimum_benchmark/backends/llama_cpp/backend.py @@ -41,15 +41,10 @@ def llama_cpp_kwargs(self) -> Dict[str, Any]: "echo": False, } - def prepare_input_shapes(self, input_shapes: Dict[str, Any]) -> Dict[str, Any]: - if self.config.task == "text-generation": - if input_shapes["batch_size"] != 1: - raise ValueError("Batch size must be 1 for LlamaCpp text generation") - - return input_shapes - def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]: if self.config.task == "text-generation": + if inputs["input_ids"].shape[0] != 1: + raise ValueError("Batch size must be 1 for LlamaCpp text generation") return {"tokens": inputs["input_ids"].squeeze(0).tolist()} elif self.config.task == "feature-extraction": diff --git a/optimum_benchmark/launchers/torchrun/launcher.py b/optimum_benchmark/launchers/torchrun/launcher.py index 98eb4a37..768ea1c7 100644 --- a/optimum_benchmark/launchers/torchrun/launcher.py +++ b/optimum_benchmark/launchers/torchrun/launcher.py @@ -24,10 +24,6 @@ class TorchrunLauncher(Launcher[TorchrunConfig]): def __init__(self, config: TorchrunConfig): super().__init__(config) - if sys.platform == "win32": - self.logger.info("\t+ Disabline libuv on Windows") - os.environ["USE_LIBUV"] = "0" - if get_start_method(allow_none=True) != self.config.start_method: self.logger.info(f"\t+ Setting multiprocessing start method to {self.config.start_method}") set_start_method(self.config.start_method, force=True) @@ -164,8 +160,14 @@ def entrypoint(worker: Callable[..., BenchmarkReport], worker_args: List[Any], l device = torch.device("cuda", rank) torch.cuda.set_device(device) + if sys.platform == "win32": + logger.info("\t+ Disabling libuv for Windows") + init_method = "env://?use_libuv=0" + else: + init_method = "env://" + logger.info("\t+ Initializing torch.distributed process group") - torch.distributed.init_process_group() + torch.distributed.init_process_group(init_method=init_method) try: report = worker(*worker_args) From 2e97648d43157c7e746c403a230aa528ac8cdefa Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Tue, 26 Nov 2024 15:50:32 +0100 Subject: [PATCH 04/16] double --- optimum_benchmark/launchers/torchrun/launcher.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/optimum_benchmark/launchers/torchrun/launcher.py b/optimum_benchmark/launchers/torchrun/launcher.py index 768ea1c7..8a20a5c7 100644 --- a/optimum_benchmark/launchers/torchrun/launcher.py +++ b/optimum_benchmark/launchers/torchrun/launcher.py @@ -129,6 +129,10 @@ def target( else: raise RuntimeError("Could not synchronize with main process") + if sys.platform == "win32": + logger.info("\t+ Disabline libuv on Windows") + os.environ["USE_LIBUV"] = "0" + try: elastic_agent_launcher = elastic_launch(config=config, entrypoint=entrypoint) outputs = elastic_agent_launcher(worker, worker_args, logger) @@ -155,19 +159,17 @@ def entrypoint(worker: Callable[..., BenchmarkReport], worker_args: List[Any], l else: setup_logging(level="ERROR", to_file=log_to_file, prefix=f"RANK-PROCESS-{rank}") + if sys.platform == "win32": + logger.info("\t+ Disabline libuv on Windows") + os.environ["USE_LIBUV"] = "0" + if torch.cuda.is_available(): logger.info(f"\t+ Setting torch.distributed cuda device to {rank}") device = torch.device("cuda", rank) torch.cuda.set_device(device) - if sys.platform == "win32": - logger.info("\t+ Disabling libuv for Windows") - init_method = "env://?use_libuv=0" - else: - init_method = "env://" - logger.info("\t+ Initializing torch.distributed process group") - torch.distributed.init_process_group(init_method=init_method) + torch.distributed.init_process_group() try: report = worker(*worker_args) From b7a28487172e0d6523b661d822bc0dfd3fe6adf6 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Tue, 26 Nov 2024 15:57:15 +0100 Subject: [PATCH 05/16] just skip torchrun on windows --- optimum_benchmark/launchers/torchrun/launcher.py | 8 -------- tests/test_cli.py | 6 ++++++ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/optimum_benchmark/launchers/torchrun/launcher.py b/optimum_benchmark/launchers/torchrun/launcher.py index 8a20a5c7..ee49e295 100644 --- a/optimum_benchmark/launchers/torchrun/launcher.py +++ b/optimum_benchmark/launchers/torchrun/launcher.py @@ -129,10 +129,6 @@ def target( else: raise RuntimeError("Could not synchronize with main process") - if sys.platform == "win32": - logger.info("\t+ Disabline libuv on Windows") - os.environ["USE_LIBUV"] = "0" - try: elastic_agent_launcher = elastic_launch(config=config, entrypoint=entrypoint) outputs = elastic_agent_launcher(worker, worker_args, logger) @@ -159,10 +155,6 @@ def entrypoint(worker: Callable[..., BenchmarkReport], worker_args: List[Any], l else: setup_logging(level="ERROR", to_file=log_to_file, prefix=f"RANK-PROCESS-{rank}") - if sys.platform == "win32": - logger.info("\t+ Disabline libuv on Windows") - os.environ["USE_LIBUV"] = "0" - if torch.cuda.is_available(): logger.info(f"\t+ Setting torch.distributed cuda device to {rank}") device = torch.device("cuda", rank) diff --git a/tests/test_cli.py b/tests/test_cli.py index c18b26fb..3a510806 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -53,6 +53,9 @@ def test_cli_configs(config_name): @pytest.mark.parametrize("launcher", ["inline", "process", "torchrun"]) def test_cli_exit_code_0(launcher): + if launcher == "torchrun" and sys.platform == "win32": + pytest.skip("torchrun is not supported on Windows") + args_0 = [ "optimum-benchmark", "--config-dir", @@ -73,6 +76,9 @@ def test_cli_exit_code_0(launcher): @pytest.mark.parametrize("launcher", ["inline", "process", "torchrun"]) def test_cli_exit_code_1(launcher): + if launcher == "torchrun" and sys.platform == "win32": + pytest.skip("torchrun is not supported on Windows") + args_1 = [ "optimum-benchmark", "--config-dir", From b64a5146ca832cd05e04131a4f52ddee9f5e389a Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Tue, 26 Nov 2024 16:01:06 +0100 Subject: [PATCH 06/16] style --- optimum_benchmark/backends/pytorch/backend.py | 2 +- optimum_benchmark/launchers/torchrun/launcher.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/optimum_benchmark/backends/pytorch/backend.py b/optimum_benchmark/backends/pytorch/backend.py index fcf522b5..bfbe9745 100644 --- a/optimum_benchmark/backends/pytorch/backend.py +++ b/optimum_benchmark/backends/pytorch/backend.py @@ -24,7 +24,7 @@ from .config import PyTorchConfig if is_deepspeed_available(): - import deepspeed + import deepspeed # type: ignore if is_torch_distributed_available(): import torch.distributed diff --git a/optimum_benchmark/launchers/torchrun/launcher.py b/optimum_benchmark/launchers/torchrun/launcher.py index ee49e295..10b45d4d 100644 --- a/optimum_benchmark/launchers/torchrun/launcher.py +++ b/optimum_benchmark/launchers/torchrun/launcher.py @@ -1,5 +1,4 @@ import os -import sys import traceback from contextlib import ExitStack from logging import Logger From 13bc8c0733c2234e14d3aa753e0ed84e8e22e67d Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Wed, 27 Nov 2024 10:30:51 +0100 Subject: [PATCH 07/16] remove dp tp distinction --- optimum_benchmark/backends/ipex/backend.py | 21 +-- .../backends/onnxruntime/backend.py | 16 +- .../backends/openvino/backend.py | 49 +++--- optimum_benchmark/backends/pytorch/backend.py | 45 ++---- .../backends/transformers_utils.py | 43 +++--- optimum_benchmark/benchmark/report.py | 26 +++- .../generators/task_generator.py | 2 - .../launchers/torchrun/launcher.py | 2 +- .../scenarios/energy_star/scenario.py | 56 +++---- .../scenarios/inference/config.py | 1 - .../scenarios/inference/scenario.py | 141 ++++++++---------- optimum_benchmark/trackers/energy.py | 16 +- optimum_benchmark/trackers/latency.py | 16 +- optimum_benchmark/trackers/memory.py | 15 +- 14 files changed, 181 insertions(+), 268 deletions(-) diff --git a/optimum_benchmark/backends/ipex/backend.py b/optimum_benchmark/backends/ipex/backend.py index 8939fdb0..b584ff6c 100644 --- a/optimum_benchmark/backends/ipex/backend.py +++ b/optimum_benchmark/backends/ipex/backend.py @@ -84,31 +84,14 @@ def automodel_kwargs(self) -> Dict[str, Any]: if self.config.torch_dtype is not None: kwargs["torch_dtype"] = getattr(torch, self.config.torch_dtype) - print(kwargs) - return kwargs @property - def is_dp_distributed(self) -> bool: + def split_between_processes(self) -> bool: return is_torch_distributed_available() and torch.distributed.is_initialized() - def prepare_input_shapes(self, input_shapes: Dict[str, Any]) -> Dict[str, Any]: - if self.is_dp_distributed: - if input_shapes["batch_size"] % torch.distributed.get_world_size() != 0: - raise ValueError( - f"Batch size {input_shapes['batch_size']} must be divisible by " - f"data parallel world size {torch.distributed.get_world_size()}" - ) - # distributing batch size across processes - input_shapes["batch_size"] //= torch.distributed.get_world_size() - - # registering input shapes for usage during model reshaping - self.input_shapes = input_shapes - - return input_shapes - def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]: - if self.is_dp_distributed: + if self.split_between_processes: with Accelerator().split_between_processes(inputs=inputs, apply_padding=False) as process_inputs: inputs = process_inputs diff --git a/optimum_benchmark/backends/onnxruntime/backend.py b/optimum_benchmark/backends/onnxruntime/backend.py index 223da6dc..2fffcc36 100644 --- a/optimum_benchmark/backends/onnxruntime/backend.py +++ b/optimum_benchmark/backends/onnxruntime/backend.py @@ -280,20 +280,12 @@ def quantize_onnx_files(self) -> None: if self.pretrained_config is not None: self.pretrained_config.save_pretrained(self.quantized_model) - def prepare_input_shapes(self, input_shapes: Dict[str, Any]) -> Dict[str, Any]: - if self.is_dp_distributed: - if input_shapes["batch_size"] % torch.distributed.get_world_size() != 0: - raise ValueError( - f"Batch size {input_shapes['batch_size']} must be divisible by " - f"data parallel world size {torch.distributed.get_world_size()}" - ) - # distributing batch size across processes - input_shapes["batch_size"] //= torch.distributed.get_world_size() - - return input_shapes + @property + def split_between_processes(self) -> bool: + return is_torch_distributed_available() and torch.distributed.is_initialized() def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]: - if self.is_dp_distributed: + if self.split_between_processes: with Accelerator().split_between_processes(inputs=inputs, apply_padding=False) as process_inputs: inputs = process_inputs diff --git a/optimum_benchmark/backends/openvino/backend.py b/optimum_benchmark/backends/openvino/backend.py index 9db49fb2..f0aa1925 100644 --- a/optimum_benchmark/backends/openvino/backend.py +++ b/optimum_benchmark/backends/openvino/backend.py @@ -82,7 +82,7 @@ def load(self) -> None: if self.config.reshape: static_shapes = { key: value - for key, value in {**self.input_shapes, **self.model_shapes}.items() + for key, value in self.model_shapes.items() if key in inspect.getfullargspec(self.pretrained_model.reshape).args } if ("sequence_length" in static_shapes) and ("height" in static_shapes) and ("width" in static_shapes): @@ -135,20 +135,6 @@ def _load_ovmodel_with_no_weights(self) -> None: self.config.export = original_export self.config.model = original_model - @property - def is_dp_distributed(self) -> bool: - return is_torch_distributed_available() and torch.distributed.is_initialized() - - @property - def ovmodel_kwargs(self) -> Dict[str, Any]: - kwargs = {} - - if self.config.task in TEXT_GENERATION_TASKS: - kwargs["use_cache"] = self.config.use_cache - kwargs["use_merged"] = self.config.use_merged - - return kwargs - def quantize_automodel(self) -> None: self.logger.info("\t+ Attempting quantization") self.quantized_model = f"{self.tmpdir.name}/quantized_model" @@ -181,23 +167,22 @@ def quantize_automodel(self) -> None: batch_size=1, ) - def prepare_input_shapes(self, input_shapes: Dict[str, Any]) -> Dict[str, Any]: - if self.is_dp_distributed: - if input_shapes["batch_size"] % torch.distributed.get_world_size() != 0: - raise ValueError( - f"Batch size {input_shapes['batch_size']} must be divisible by " - f"data parallel world size {torch.distributed.get_world_size()}" - ) - # distributing batch size across processes - input_shapes["batch_size"] //= torch.distributed.get_world_size() + @property + def ovmodel_kwargs(self) -> Dict[str, Any]: + kwargs = {} - # registering input shapes for usage during model reshaping - self.input_shapes = input_shapes + if self.config.task in TEXT_GENERATION_TASKS: + kwargs["use_cache"] = self.config.use_cache + kwargs["use_merged"] = self.config.use_merged - return input_shapes + return kwargs + + @property + def split_between_processes(self) -> bool: + return is_torch_distributed_available() and torch.distributed.is_initialized() def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]: - if self.is_dp_distributed: + if self.split_between_processes: with Accelerator().split_between_processes(inputs=inputs, apply_padding=False) as process_inputs: inputs = process_inputs @@ -205,6 +190,14 @@ def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]: if hasattr(self.pretrained_model, "input_names") and key not in self.pretrained_model.input_names: inputs.pop(key) + if "input_ids" in inputs: + self.model_shapes.update(dict(zip(["batch_size", "sequence_length"], inputs["input_ids"].shape))) + + if "pixel_values" in inputs: + self.model_shapes.update( + dict(zip(["batch_size", "num_channels", "height", "width"], inputs["pixel_values"].shape)) + ) + return inputs def forward(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict: diff --git a/optimum_benchmark/backends/pytorch/backend.py b/optimum_benchmark/backends/pytorch/backend.py index bfbe9745..5052e148 100644 --- a/optimum_benchmark/backends/pytorch/backend.py +++ b/optimum_benchmark/backends/pytorch/backend.py @@ -27,7 +27,7 @@ import deepspeed # type: ignore if is_torch_distributed_available(): - import torch.distributed + import torch.distributed # type: ignore if is_zentorch_available(): import zentorch # type: ignore # noqa: F401 @@ -326,18 +326,6 @@ def process_quantization_config(self) -> None: else: raise ValueError(f"Quantization scheme {self.config.quantization_scheme} not recognized") - @property - def is_distributed(self) -> bool: - return is_torch_distributed_available() and torch.distributed.is_initialized() - - @property - def is_tp_distributed(self) -> bool: - return self.is_distributed and self.config.deepspeed_inference - - @property - def is_dp_distributed(self) -> bool: - return self.is_distributed and not self.config.deepspeed_inference - @property def is_quantized(self) -> bool: return self.config.quantization_scheme is not None or ( @@ -407,35 +395,26 @@ def automodel_kwargs(self) -> Dict[str, Any]: return kwargs - def prepare_input_shapes(self, input_shapes: Dict[str, Any]) -> Dict[str, Any]: - if self.is_dp_distributed: - if input_shapes["batch_size"] % torch.distributed.get_world_size() != 0: - raise ValueError( - f"Batch size {input_shapes['batch_size']} must be divisible by " - f"data parallel world size {torch.distributed.get_world_size()}" - ) - # distributing batch size across processes - input_shapes["batch_size"] //= torch.distributed.get_world_size() - - if self.is_tp_distributed: - if torch.distributed.get_rank() != 0: - # zeroing throughput on other ranks - input_shapes["batch_size"] = 0 - - return input_shapes + @property + def split_between_processes(self) -> bool: + return ( + is_torch_distributed_available() + and torch.distributed.is_initialized() + and not self.config.deepspeed_inference + ) def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]: - if self.is_dp_distributed: + if self.split_between_processes: with Accelerator().split_between_processes(inputs=inputs, apply_padding=False) as process_inputs: inputs = process_inputs - if self.config.library == "timm": - inputs = {"x": inputs["pixel_values"]} - for key, value in inputs.items(): if isinstance(value, torch.Tensor): inputs[key] = value.to(self.config.device) + if self.config.library == "timm": + inputs = {"x": inputs["pixel_values"]} + return inputs @torch.inference_mode() diff --git a/optimum_benchmark/backends/transformers_utils.py b/optimum_benchmark/backends/transformers_utils.py index 3b38bc2c..efd2b8af 100644 --- a/optimum_benchmark/backends/transformers_utils.py +++ b/optimum_benchmark/backends/transformers_utils.py @@ -1,5 +1,5 @@ from contextlib import contextmanager -from typing import Any, Dict, Optional, Union +from typing import Any, Dict, Optional, Type, Union import torch import transformers @@ -7,6 +7,7 @@ AutoConfig, AutoFeatureExtractor, AutoImageProcessor, + AutoModel, AutoProcessor, AutoTokenizer, FeatureExtractionMixin, @@ -17,9 +18,7 @@ SpecialTokensMixin, ) -from ..import_utils import is_torch_available - -TASKS_TO_MODEL_LOADERS = { +TASKS_TO_AUTOMODEL_CLASS_NAMES = { # text processing "feature-extraction": "AutoModel", "fill-mask": "AutoModelForMaskedLM", @@ -57,34 +56,26 @@ "sentence-similarity": "feature-extraction", } -if is_torch_available(): - TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES = {} - for task_name, model_loaders in TASKS_TO_MODEL_LOADERS.items(): - TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES[task_name] = {} - - if isinstance(model_loaders, str): - model_loaders = (model_loaders,) - - for model_loader_name in model_loaders: - model_loader_class = getattr(transformers, model_loader_name, None) - if model_loader_class is not None: - TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES[task_name].update( - model_loader_class._model_mapping._model_mapping - ) -else: - TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES = {} - -def get_transformers_automodel_loader_for_task(task: str, model_type: Optional[str] = None): +def get_transformers_automodel_class_for_task(task: str, model_type: Optional[str] = None) -> Type["AutoModel"]: if task in SYNONYM_TASKS: task = SYNONYM_TASKS[task] - if model_type is not None: - model_loader_name = TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES[task][model_type] + if task not in TASKS_TO_AUTOMODEL_CLASS_NAMES: + raise ValueError(f"Task {task} not supported") + + if isinstance(TASKS_TO_AUTOMODEL_CLASS_NAMES[task], str): + return getattr(transformers, TASKS_TO_AUTOMODEL_CLASS_NAMES[task]) else: - model_loader_name = TASKS_TO_MODEL_LOADERS[task] + if model_type is None: + raise ValueError(f"Task {task} requires a model_type to be specified") + + for automodel_class_name in TASKS_TO_AUTOMODEL_CLASS_NAMES[task]: + automodel_class = getattr(transformers, automodel_class_name) + if model_type in automodel_class._model_mapping._model_mapping: + return automodel_class - return getattr(transformers, model_loader_name) + raise ValueError(f"Task {task} not supported for model type {model_type}") PretrainedProcessor = Union["FeatureExtractionMixin", "ImageProcessingMixin", "SpecialTokensMixin", "ProcessorMixin"] diff --git a/optimum_benchmark/benchmark/report.py b/optimum_benchmark/benchmark/report.py index c4b0602d..b9edd960 100644 --- a/optimum_benchmark/benchmark/report.py +++ b/optimum_benchmark/benchmark/report.py @@ -35,16 +35,26 @@ def __post_init__(self): self.efficiency = Efficiency(**self.efficiency) @staticmethod - def aggregate(measurements: List["TargetMeasurements"]) -> "TargetMeasurements": + def aggregate_across_processes(measurements: List["TargetMeasurements"]) -> "TargetMeasurements": assert len(measurements) > 0, "No measurements to aggregate" m0 = measurements[0] - memory = Memory.aggregate([m.memory for m in measurements]) if m0.memory is not None else None - latency = Latency.aggregate([m.latency for m in measurements]) if m0.latency is not None else None - throughput = Throughput.aggregate([m.throughput for m in measurements]) if m0.throughput is not None else None - energy = Energy.aggregate([m.energy for m in measurements]) if m0.energy is not None else None - efficiency = Efficiency.aggregate([m.efficiency for m in measurements]) if m0.efficiency is not None else None + memory = Memory.aggregate_across_processes([m.memory for m in measurements]) if m0.memory is not None else None + latency = ( + Latency.aggregate_across_processes([m.latency for m in measurements]) if m0.latency is not None else None + ) + throughput = ( + Throughput.aggregate_across_processes([m.throughput for m in measurements]) + if m0.throughput is not None + else None + ) + energy = Energy.aggregate_across_processes([m.energy for m in measurements]) if m0.energy is not None else None + efficiency = ( + Efficiency.aggregate_across_processes([m.efficiency for m in measurements]) + if m0.efficiency is not None + else None + ) return TargetMeasurements( memory=memory, latency=latency, throughput=throughput, energy=energy, efficiency=efficiency @@ -99,11 +109,11 @@ def __post_init__(self): setattr(self, target, TargetMeasurements(**getattr(self, target))) @classmethod - def aggregate(cls, reports: List["BenchmarkReport"]) -> "BenchmarkReport": + def aggregate_across_processes(cls, reports: List["BenchmarkReport"]) -> "BenchmarkReport": aggregated_measurements = {} for target in reports[0].to_dict().keys(): measurements = [getattr(report, target) for report in reports] - aggregated_measurements[target] = TargetMeasurements.aggregate(measurements) + aggregated_measurements[target] = TargetMeasurements.aggregate_across_processes(measurements) return cls.from_dict(aggregated_measurements) diff --git a/optimum_benchmark/generators/task_generator.py b/optimum_benchmark/generators/task_generator.py index f11d21eb..96dbb2e5 100644 --- a/optimum_benchmark/generators/task_generator.py +++ b/optimum_benchmark/generators/task_generator.py @@ -445,6 +445,4 @@ def __call__(self): "image-text-to-text": ImageTextToTextGenerator, # diffusers pipelines tasks "text-to-image": PromptGenerator, - "stable-diffusion": PromptGenerator, - "stable-diffusion-xl": PromptGenerator, } diff --git a/optimum_benchmark/launchers/torchrun/launcher.py b/optimum_benchmark/launchers/torchrun/launcher.py index 10b45d4d..99d5ba12 100644 --- a/optimum_benchmark/launchers/torchrun/launcher.py +++ b/optimum_benchmark/launchers/torchrun/launcher.py @@ -100,7 +100,7 @@ def launch(self, worker: Callable[..., BenchmarkReport], worker_args: List[Any]) raise RuntimeError(f"Received an unexpected response from isolated process: {output}") self.logger.info("\t+ Aggregating reports from all rank processes") - report = BenchmarkReport.aggregate(reports) + report = BenchmarkReport.aggregate_across_processes(reports) return report diff --git a/optimum_benchmark/scenarios/energy_star/scenario.py b/optimum_benchmark/scenarios/energy_star/scenario.py index 39b12a04..8345cae0 100644 --- a/optimum_benchmark/scenarios/energy_star/scenario.py +++ b/optimum_benchmark/scenarios/energy_star/scenario.py @@ -38,7 +38,7 @@ PREPROCESS_EFFICIENCY_UNIT = "samples/kWh" FORWARD_EFFICIENCY_UNIT = "samples/kWh" -PREFILL_EFFICIENCY_UNIT = "tokens/kWh" +PREFILL_EFFICIENCY_UNIT = "samples/kWh" DECODE_EFFICIENCY_UNIT = "tokens/kWh" CALL_EFFICIENCY_UNIT = "images/kWh" @@ -50,9 +50,9 @@ def __init__(self, config: EnergyStarConfig) -> None: super().__init__(config) def run(self, backend: Backend[BackendConfigT]) -> BenchmarkReport: - self.task = backend.config.task + self.backend = backend - if self.task in TEXT_GENERATION_TASKS: + if self.backend.config.task in TEXT_GENERATION_TASKS: self.logger.info("\t+ Updating Text Generation kwargs with default values") self.config.generate_kwargs = {**TEXT_GENERATION_DEFAULT_KWARGS, **self.config.generate_kwargs} self.prefill_kwargs = {**self.config.generate_kwargs, **TEXT_GENERATION_PREFILL_OVERRIDES} @@ -60,7 +60,7 @@ def run(self, backend: Backend[BackendConfigT]) -> BenchmarkReport: self.report = BenchmarkReport.from_list( targets=["load_dataset", "preprocess_dataset", "load_model", "prefill", "decode"] ) - elif self.task in IMAGE_DIFFUSION_TASKS: + elif self.backend.config.task in IMAGE_DIFFUSION_TASKS: self.logger.info("\t+ Updating Image Diffusion kwargs with default values") self.config.call_kwargs = {**IMAGE_DIFFUSION_DEFAULT_KWARGS, **self.config.call_kwargs} self.logger.info("\t+ Initializing Image Diffusion report") @@ -80,17 +80,18 @@ def run(self, backend: Backend[BackendConfigT]) -> BenchmarkReport: ) self.run_dataset_loading_energy_tracking() - self.run_model_loading_energy_tracking(backend) self.run_dataset_preprocessing_energy_tracking(backend) self.logger.info("\t+ Preparing sample inputs for model warmup") - self.raw_sample_inputs = self.dataset[: self.config.input_shapes["batch_size"]] - self.prepared_sample_inputs = backend.prepare_inputs(self.raw_sample_inputs) + self.sample_inputs = self.dataset[: self.config.input_shapes["batch_size"]] + self.sample_inputs = backend.prepare_inputs(self.sample_inputs) + + self.run_model_loading_energy_tracking(backend) - if self.task in TEXT_GENERATION_TASKS: + if self.backend.config.task in TEXT_GENERATION_TASKS: self.warmup_text_generation(backend) self.run_text_generation_energy_tracking(backend) - elif self.task in IMAGE_DIFFUSION_TASKS: + elif self.backend.config.task in IMAGE_DIFFUSION_TASKS: self.warmup_image_diffusion(backend) self.run_image_diffusion_energy_tracking(backend) else: @@ -115,7 +116,7 @@ def run_dataset_preprocessing_energy_tracking(self, backend: Backend[BackendConf self.logger.info("\t+ Running dataset preprocessing energy tracking") with self.energy_tracker.track(file_prefix="preprocess_dataset"): - self.dataset = TASKS_TO_PREPROCESSORS[self.task]( + self.dataset = TASKS_TO_PREPROCESSORS[self.backend.config.task]( dataset=self.dataset, scenario_config=self.config, pretrained_config=backend.pretrained_config, @@ -144,24 +145,22 @@ def run_model_loading_energy_tracking(self, backend: Backend[BackendConfigT]): # Text Generation warmup def warmup_text_generation(self, backend: Backend[BackendConfigT]): self.logger.info("\t+ Warming up backend for Text Generation") - backend.generate(self.prepared_sample_inputs, self.config.generate_kwargs) + backend.generate(self.sample_inputs, self.config.generate_kwargs) for _ in range(self.config.warmup_runs): - backend.generate( - self.prepared_sample_inputs, {**self.config.generate_kwargs, **TEXT_GENERATION_WARMUP_OVERRIDES} - ) + backend.generate(self.sample_inputs, {**self.config.generate_kwargs, **TEXT_GENERATION_WARMUP_OVERRIDES}) # Image Diffusion warmup def warmup_image_diffusion(self, backend: Backend[BackendConfigT]): self.logger.info("\t+ Warming up backend for Image Diffusion") - backend.call(self.prepared_sample_inputs, self.config.call_kwargs) + backend.call(self.sample_inputs, self.config.call_kwargs) for _ in range(self.config.warmup_runs): - backend.call(self.prepared_sample_inputs, {**self.config.call_kwargs, **IMAGE_DIFFUSION_WARMUP_OVERRIDES}) + backend.call(self.sample_inputs, {**self.config.call_kwargs, **IMAGE_DIFFUSION_WARMUP_OVERRIDES}) # Inference warmup def warmup_inference(self, backend: Backend[BackendConfigT]): self.logger.info("\t+ Warming up backend for Inference") for _ in range(self.config.warmup_runs): - backend.forward(self.prepared_sample_inputs, self.config.forward_kwargs) + backend.forward(self.sample_inputs, self.config.forward_kwargs) # Text Generation energy tracking def run_text_generation_energy_tracking(self, backend: Backend[BackendConfigT]): @@ -243,25 +242,8 @@ def dataset_forward_volume(self) -> int: # in samples return self.config.num_samples @property - def dataset_prefill_volume(self) -> int: # in tokens - prefill_volume = 0 - - for sample in self.dataset: - if "input_ids" in sample.keys(): - # text/image-text/video-image-text conditioned generation - prefill_volume += self.raw_sample_inputs["input_ids"].numel() - else: - # image/audio/other conditioned generation (1 bos token) - prefill_volume += 1 - - return prefill_volume - - @property - def dataset_per_token_volume(self) -> int: # in tokens - return ( - self.config.num_samples - * self.config.generate_kwargs["num_beams"] # at each beam stage there are num_beams tokens generated - ) + def dataset_prefill_volume(self) -> int: # in samples + return self.config.num_samples @property def dataset_decode_volume(self) -> int: # in tokens @@ -273,7 +255,7 @@ def dataset_decode_volume(self) -> int: # in tokens @property def dataset_call_volume(self) -> int: # in images - if self.task == "text-to-image": + if self.backend.config.task == "text-to-image": return self.config.num_samples * self.config.call_kwargs["num_images_per_prompt"] else: return self.config.num_samples diff --git a/optimum_benchmark/scenarios/inference/config.py b/optimum_benchmark/scenarios/inference/config.py index 57d482ab..d86962eb 100644 --- a/optimum_benchmark/scenarios/inference/config.py +++ b/optimum_benchmark/scenarios/inference/config.py @@ -9,7 +9,6 @@ INPUT_SHAPES = { "batch_size": 2, - "sequence_length": 16, } diff --git a/optimum_benchmark/scenarios/inference/scenario.py b/optimum_benchmark/scenarios/inference/scenario.py index 2f0ac8e7..e05cb7b9 100644 --- a/optimum_benchmark/scenarios/inference/scenario.py +++ b/optimum_benchmark/scenarios/inference/scenario.py @@ -40,13 +40,17 @@ "num_inference_steps": 2, } -TEXT_GENERATION_THROUGHPUT_UNIT = "tokens/s" -IMAGE_DIFFUSION_THROUGHPUT_UNIT = "images/s" -INFERENCE_THROUGHPUT_UNIT = "samples/s" -TEXT_GENERATION_EFFICIENCY_UNIT = "tokens/kWh" -IMAGE_DIFFUSION_EFFICIENCY_UNIT = "images/kWh" -INFERENCE_EFFICIENCY_UNIT = "samples/kWh" +FORWARD_THROUGHPUT_UNIT = "samples/s" +PREFILL_THROUGHPUT_UNIT = "samples/s" +DECODE_THROUGHPUT_UNIT = "tokens/s" +CALL_THROUGHPUT_UNIT = "images/s" + + +FORWARD_EFFICIENCY_UNIT = "samples/kWh" +PREFILL_EFFICIENCY_UNIT = "samples/kWh" +DECODE_EFFICIENCY_UNIT = "tokens/kWh" +CALL_EFFICIENCY_UNIT = "images/kWh" class InferenceScenario(Scenario[InferenceConfig]): @@ -56,77 +60,71 @@ def __init__(self, config: InferenceConfig) -> None: super().__init__(config) def run(self, backend: Backend[BackendConfigT]) -> BenchmarkReport: - self.task = backend.config.task + self.backend = backend - self.logger.info("\t+ Creating input generator") - self.input_generator = InputGenerator( - task=self.task, - input_shapes=self.config.input_shapes, - model_shapes=backend.model_shapes, - model_type=backend.config.model_type, - ) - - if self.task in TEXT_GENERATION_TASKS: - self.logger.info("\t+ Generating Text Generation inputs") - self.inputs = self.input_generator() + if self.backend.config.task in TEXT_GENERATION_TASKS: self.logger.info("\t+ Updating Text Generation kwargs with default values") self.config.generate_kwargs = {**TEXT_GENERATION_DEFAULT_KWARGS, **self.config.generate_kwargs} self.logger.info("\t+ Initializing Text Generation report") self.report = BenchmarkReport.from_list(targets=["load", "prefill", "decode", "per_token"]) - elif self.task in IMAGE_DIFFUSION_TASKS: - self.logger.info("\t+ Generating Image Diffusion inputs") - self.inputs = self.input_generator() + elif self.backend.config.task in IMAGE_DIFFUSION_TASKS: self.logger.info("\t+ Updating Image Diffusion kwargs with default values") self.config.call_kwargs = {**IMAGE_DIFFUSION_DEFAULT_KWARGS, **self.config.call_kwargs} self.logger.info("\t+ Initializing Image Diffusion report") self.report = BenchmarkReport.from_list(targets=["load", "call"]) else: - self.logger.info("\t+ Generating Inference inputs") - self.inputs = self.input_generator() self.logger.info("\t+ Initializing Inference report") self.report = BenchmarkReport.from_list(targets=["load", "forward"]) - self.logger.info("\t+ Preparing input shapes for Inference") - self.config.input_shapes = backend.prepare_input_shapes(input_shapes=self.config.input_shapes) + self.logger.info("\t+ Creating input generator") + self.input_generator = InputGenerator( + task=self.backend.config.task, + model_shapes=backend.model_shapes, + input_shapes=self.config.input_shapes, + model_type=backend.config.model_type, + ) - self.run_model_loading_tracking(backend) + self.logger.info("\t+ Generating inputs") + self.inputs = self.input_generator() self.logger.info("\t+ Preparing inputs for Inference") self.inputs = backend.prepare_inputs(inputs=self.inputs) + self.run_model_loading_tracking(backend) + if self.config.latency or self.config.energy: # latency and energy are metrics that require some warmup if self.config.warmup_runs > 0: - if self.task in TEXT_GENERATION_TASKS: + if self.backend.config.task in TEXT_GENERATION_TASKS: self.warmup_text_generation(backend) - elif self.task in IMAGE_DIFFUSION_TASKS: + elif self.backend.config.task in IMAGE_DIFFUSION_TASKS: self.warmup_image_diffusion(backend) else: self.warmup_inference(backend) if self.config.latency: - if self.task in TEXT_GENERATION_TASKS: + if self.backend.config.task in TEXT_GENERATION_TASKS: if backend.config.name in PER_TOKEN_BACKENDS: self.run_per_token_text_generation_latency_tracking(backend) else: self.run_text_generation_latency_tracking(backend) - elif self.task in IMAGE_DIFFUSION_TASKS: + elif self.backend.config.task in IMAGE_DIFFUSION_TASKS: self.run_image_diffusion_latency_tracking(backend) else: self.run_latency_inference_tracking(backend) if self.config.memory: - if self.task in TEXT_GENERATION_TASKS: + if self.backend.config.task in TEXT_GENERATION_TASKS: self.run_text_generation_memory_tracking(backend) - elif self.task in IMAGE_DIFFUSION_TASKS: + elif self.backend.config.task in IMAGE_DIFFUSION_TASKS: self.run_image_diffusion_memory_tracking(backend) else: self.run_inference_memory_tracking(backend) if self.config.energy: - if self.task in TEXT_GENERATION_TASKS: + if self.backend.config.task in TEXT_GENERATION_TASKS: self.run_text_generation_energy_tracking(backend) - elif self.task in IMAGE_DIFFUSION_TASKS: + elif self.backend.config.task in IMAGE_DIFFUSION_TASKS: self.run_image_diffusion_energy_tracking(backend) else: self.run_inference_energy_tracking(backend) @@ -178,42 +176,42 @@ def run_model_loading_tracking(self, backend: Backend[BackendConfigT]): ## Memory tracking def run_text_generation_memory_tracking(self, backend: Backend[BackendConfigT]): self.logger.info("\t+ Running Text Generation memory tracking") - self.memory_tracker = MemoryTracker( + memory_tracker = MemoryTracker( backend=backend.config.name, device=backend.config.device, device_ids=backend.config.device_ids ) prefill_kwargs = {**self.config.generate_kwargs, **TEXT_GENERATION_PREFILL_OVERRIDES} - with self.memory_tracker.track(): + with memory_tracker.track(): _ = backend.prefill(self.inputs, prefill_kwargs) - self.report.prefill.memory = self.memory_tracker.get_max_memory() + self.report.prefill.memory = memory_tracker.get_max_memory() - with self.memory_tracker.track(): + with memory_tracker.track(): _ = backend.generate(self.inputs, self.config.generate_kwargs) - self.report.decode.memory = self.memory_tracker.get_max_memory() + self.report.decode.memory = memory_tracker.get_max_memory() def run_image_diffusion_memory_tracking(self, backend: Backend[BackendConfigT]): self.logger.info("\t+ Running Image Diffusion memory tracking") - self.memory_tracker = MemoryTracker( + memory_tracker = MemoryTracker( backend=backend.config.name, device=backend.config.device, device_ids=backend.config.device_ids ) - with self.memory_tracker.track(): + with memory_tracker.track(): _ = backend.call(self.inputs, self.config.call_kwargs) - self.report.call.memory = self.memory_tracker.get_max_memory() + self.report.call.memory = memory_tracker.get_max_memory() def run_inference_memory_tracking(self, backend: Backend[BackendConfigT]): self.logger.info("\t+ Running Inference memory tracking") - self.memory_tracker = MemoryTracker( + memory_tracker = MemoryTracker( backend=backend.config.name, device=backend.config.device, device_ids=backend.config.device_ids ) - with self.memory_tracker.track(): + with memory_tracker.track(): _ = backend.forward(self.inputs, self.config.forward_kwargs) - self.report.forward.memory = self.memory_tracker.get_max_memory() + self.report.forward.memory = memory_tracker.get_max_memory() ## Latency tracking def run_per_token_text_generation_latency_tracking(self, backend: Backend[BackendConfigT]): @@ -229,7 +227,6 @@ def run_per_token_text_generation_latency_tracking(self, backend: Backend[Backen prefill_latency = latency_tracker.get_prefill_latency() decode_latency = latency_tracker.get_decode_latency() - per_token_volume = self.atomic_per_token_volume prefill_volume = self.atomic_prefill_volume decode_volume = self.atomic_decode_volume @@ -237,14 +234,12 @@ def run_per_token_text_generation_latency_tracking(self, backend: Backend[Backen self.report.prefill.latency = prefill_latency self.report.decode.latency = decode_latency - self.report.per_token.throughput = Throughput.from_latency( - per_token_latency, per_token_volume, unit=TEXT_GENERATION_THROUGHPUT_UNIT - ) + # we don't register a per-token throughput, as it's a confusing metric and the same as the decode throughput self.report.prefill.throughput = Throughput.from_latency( - prefill_latency, prefill_volume, unit=TEXT_GENERATION_THROUGHPUT_UNIT + prefill_latency, prefill_volume, unit=PREFILL_THROUGHPUT_UNIT ) self.report.decode.throughput = Throughput.from_latency( - decode_latency, decode_volume, unit=TEXT_GENERATION_THROUGHPUT_UNIT + decode_latency, decode_volume, unit=DECODE_THROUGHPUT_UNIT ) def run_text_generation_latency_tracking(self, backend: Backend[BackendConfigT]): @@ -261,7 +256,7 @@ def run_text_generation_latency_tracking(self, backend: Backend[BackendConfigT]) self.report.prefill.latency = prefill_latency self.report.prefill.throughput = Throughput.from_latency( - prefill_latency, prefill_volume, unit=TEXT_GENERATION_THROUGHPUT_UNIT + prefill_latency, prefill_volume, unit=PREFILL_THROUGHPUT_UNIT ) latency_tracker.reset() @@ -275,7 +270,7 @@ def run_text_generation_latency_tracking(self, backend: Backend[BackendConfigT]) self.report.decode.latency = decode_latency self.report.decode.throughput = Throughput.from_latency( - decode_latency, decode_volume, unit=TEXT_GENERATION_THROUGHPUT_UNIT + decode_latency, decode_volume, unit=DECODE_THROUGHPUT_UNIT ) def run_image_diffusion_latency_tracking(self, backend: Backend[BackendConfigT]): @@ -290,9 +285,7 @@ def run_image_diffusion_latency_tracking(self, backend: Backend[BackendConfigT]) call_volume = self.atomic_call_volume self.report.call.latency = call_latency - self.report.call.throughput = Throughput.from_latency( - call_latency, call_volume, unit=IMAGE_DIFFUSION_THROUGHPUT_UNIT - ) + self.report.call.throughput = Throughput.from_latency(call_latency, call_volume, unit=CALL_THROUGHPUT_UNIT) def run_latency_inference_tracking(self, backend: Backend[BackendConfigT]): self.logger.info("\t+ Running Inference latency tracking") @@ -307,7 +300,7 @@ def run_latency_inference_tracking(self, backend: Backend[BackendConfigT]): self.report.forward.latency = forward_latency self.report.forward.throughput = Throughput.from_latency( - forward_latency, forward_volume, unit=INFERENCE_THROUGHPUT_UNIT + forward_latency, forward_volume, unit=FORWARD_THROUGHPUT_UNIT ) ## Energy tracking @@ -333,7 +326,7 @@ def run_text_generation_energy_tracking(self, backend: Backend[BackendConfigT]): self.report.prefill.energy = prefill_energy self.report.prefill.efficiency = Efficiency.from_energy( - prefill_energy, prefill_volume, unit=TEXT_GENERATION_EFFICIENCY_UNIT + prefill_energy, prefill_volume, unit=PREFILL_EFFICIENCY_UNIT ) count = 0 @@ -352,7 +345,7 @@ def run_text_generation_energy_tracking(self, backend: Backend[BackendConfigT]): self.report.decode.energy = decode_energy self.report.decode.efficiency = Efficiency.from_energy( - decode_energy, decode_volume, unit=TEXT_GENERATION_EFFICIENCY_UNIT + decode_energy, decode_volume, unit=DECODE_EFFICIENCY_UNIT ) def run_image_diffusion_energy_tracking(self, backend: Backend[BackendConfigT]): @@ -375,9 +368,7 @@ def run_image_diffusion_energy_tracking(self, backend: Backend[BackendConfigT]): call_volume = self.atomic_call_volume self.report.call.energy = call_energy - self.report.call.efficiency = Efficiency.from_energy( - call_energy, call_volume, unit=IMAGE_DIFFUSION_EFFICIENCY_UNIT - ) + self.report.call.efficiency = Efficiency.from_energy(call_energy, call_volume, unit=CALL_EFFICIENCY_UNIT) def run_inference_energy_tracking(self, backend: Backend[BackendConfigT]): self.logger.info("\t+ Running energy tracking") @@ -400,31 +391,19 @@ def run_inference_energy_tracking(self, backend: Backend[BackendConfigT]): self.report.forward.energy = forward_energy self.report.forward.efficiency = Efficiency.from_energy( - forward_energy, forward_volume, unit=INFERENCE_EFFICIENCY_UNIT + forward_energy, forward_volume, unit=FORWARD_EFFICIENCY_UNIT ) @property - def atomic_forward_volume(self) -> int: # in samples + def atomic_forward_volume(self) -> int: # in terms of processed samples return self.config.input_shapes["batch_size"] @property - def atomic_prefill_volume(self) -> int: # in tokens - if {"input_ids", "prompt", "prompts"} & set(self.inputs.keys()): - # text conditioned generation (sequence_length tokens) - return self.config.input_shapes["batch_size"] * self.config.input_shapes["sequence_length"] - else: - # image/audio conditioned generation (1 bos token) - return self.config.input_shapes["batch_size"] - - @property - def atomic_per_token_volume(self) -> int: # in tokens - return ( - self.config.input_shapes["batch_size"] - * self.config.generate_kwargs["num_beams"] # at each beam stage there are num_beams tokens generated - ) + def atomic_prefill_volume(self) -> int: # in terms of processed samples + return self.config.input_shapes["batch_size"] @property - def atomic_decode_volume(self) -> int: # in tokens + def atomic_decode_volume(self) -> int: # in terms of output/generated tokens return ( self.config.input_shapes["batch_size"] * self.config.generate_kwargs["num_beams"] # at each beam stage there are num_beams tokens generated @@ -432,8 +411,8 @@ def atomic_decode_volume(self) -> int: # in tokens ) @property - def atomic_call_volume(self) -> int: # in images - if self.task == "text-to-image": + def atomic_call_volume(self) -> int: # in terms of output images + if self.backend.config.task == "text-to-image": return self.config.input_shapes["batch_size"] * self.config.call_kwargs["num_images_per_prompt"] else: return self.config.input_shapes["batch_size"] diff --git a/optimum_benchmark/trackers/energy.py b/optimum_benchmark/trackers/energy.py index 3586809f..427c4d40 100644 --- a/optimum_benchmark/trackers/energy.py +++ b/optimum_benchmark/trackers/energy.py @@ -61,19 +61,20 @@ def __truediv__(self, scalar: float) -> "Energy": ) @staticmethod - def aggregate(energies: List["Energy"]) -> "Energy": - if len(energies) == 0 or all(energy is None for energy in energies): - return None + def aggregate_across_processes(energies: List[Optional["Energy"]]) -> Optional["Energy"]: + if len(energies) == 0: + raise ValueError("No energy measurements to aggregate") elif any(energy is None for energy in energies): raise ValueError("Some energy measurements are missing") # since measurements are machine-level, we just take the average + total = sum(energy.total for energy in energies) / len(energies) cpu = sum(energy.cpu for energy in energies) / len(energies) gpu = sum(energy.gpu for energy in energies) / len(energies) ram = sum(energy.ram for energy in energies) / len(energies) - total = sum(energy.total for energy in energies) / len(energies) + unit = energies[0].unit - return Energy(cpu=cpu, gpu=gpu, ram=ram, total=total, unit=ENERGY_UNIT) + return Energy(cpu=cpu, gpu=gpu, ram=ram, total=total, unit=unit) def to_plain_text(self) -> str: plain_text = "" @@ -109,14 +110,15 @@ class Efficiency: value: float @staticmethod - def aggregate(efficiencies: List["Efficiency"]) -> "Efficiency": + def aggregate_across_processes(efficiencies: List[Optional["Efficiency"]]) -> Optional["Efficiency"]: if len(efficiencies) == 0: raise ValueError("No efficiency measurements to aggregate") elif any(efficiency is None for efficiency in efficiencies): raise ValueError("Some efficiency measurements are None") - unit = efficiencies[0].unit + # since measurements are machine-level, we just take the average value = sum(efficiency.value for efficiency in efficiencies) / len(efficiencies) + unit = efficiencies[0].unit return Efficiency(value=value, unit=unit) diff --git a/optimum_benchmark/trackers/latency.py b/optimum_benchmark/trackers/latency.py index 908108cb..de4ab341 100644 --- a/optimum_benchmark/trackers/latency.py +++ b/optimum_benchmark/trackers/latency.py @@ -53,14 +53,17 @@ def __sub__(self, latency: "Latency") -> "Latency": return Latency.from_values(values=latencies, unit=self.unit) @staticmethod - def aggregate(latencies: List["Latency"]) -> "Latency": - if len(latencies) == 0 or all(latency is None for latency in latencies): - return None + def aggregate_across_processes(latencies: List["Latency"]) -> "Latency": + if len(latencies) == 0: + raise ValueError("No latency measurements to aggregate") elif any(latency is None for latency in latencies): raise ValueError("Some latency measurements are missing") - unit = latencies[0].unit + # we combine the lists of latencies and statistics are then computed on this list values = sum((lat.values for lat in latencies), []) + + unit = latencies[0].unit + return Latency.from_values(values=values, unit=unit) @staticmethod @@ -123,14 +126,15 @@ class Throughput: value: float @staticmethod - def aggregate(throughputs: List["Throughput"]) -> "Throughput": + def aggregate_across_processes(throughputs: List[Optional["Throughput"]]) -> Optional["Throughput"]: if len(throughputs) == 0: raise ValueError("No throughput measurements to aggregate") elif any(throughput is None for throughput in throughputs): raise ValueError("Some throughput measurements are missing") + # we compute throughputs on the whole input level so we just take the average + value = sum(throughput.value for throughput in throughputs) / len(throughputs) unit = throughputs[0].unit - value = sum(throughput.value for throughput in throughputs) return Throughput(value=value, unit=unit) diff --git a/optimum_benchmark/trackers/memory.py b/optimum_benchmark/trackers/memory.py index 5e9359b1..47edf71e 100644 --- a/optimum_benchmark/trackers/memory.py +++ b/optimum_benchmark/trackers/memory.py @@ -52,16 +52,14 @@ class Memory: max_allocated: Optional[float] = None @staticmethod - def aggregate(memories: List["Memory"]) -> "Memory": + def aggregate_across_processes(memories: List["Memory"]) -> "Memory": if len(memories) == 0: raise ValueError("No memory measurements to aggregate") elif any(memory is None for memory in memories): raise ValueError("Some memory measurements are missing") - unit = memories[0].unit - - # process specific measurements - max_ram = sum(memory.max_ram for memory in memories) + # ram, reserved, allocated, and process_vram measurements are process-specific so they are summed + max_ram = sum(memory.max_ram for memory in memories) if memories[0].max_ram is not None else None max_reserved = sum(memory.max_reserved for memory in memories) if memories[0].max_reserved is not None else None max_allocated = ( sum(memory.max_allocated for memory in memories) if memories[0].max_allocated is not None else None @@ -69,10 +67,13 @@ def aggregate(memories: List["Memory"]) -> "Memory": max_process_vram = ( sum(memory.max_process_vram for memory in memories) if memories[0].max_process_vram is not None else None ) - # machine level measurements + # global_vram is not process-specific so we take the average max_global_vram = ( - max(memory.max_global_vram for memory in memories) if memories[0].max_global_vram is not None else None + sum(memory.max_global_vram for memory in memories) / len(memories) + if memories[0].max_global_vram is not None + else None ) + unit = memories[0].unit return Memory( unit=unit, From 712d8517ab801e3674339834c94df34feb558564 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Wed, 27 Nov 2024 10:32:59 +0100 Subject: [PATCH 08/16] fix style --- examples/pytorch_llama.py | 2 +- optimum_benchmark/backends/pytorch/backend.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/pytorch_llama.py b/examples/pytorch_llama.py index bcaaedcd..fe732bfa 100644 --- a/examples/pytorch_llama.py +++ b/examples/pytorch_llama.py @@ -33,7 +33,7 @@ "torch_dtype": "bfloat16", "quantization_scheme": "torchao", "quantization_config": {"quant_type": "int4_weight_only", "group_size": 128}, - } + }, } diff --git a/optimum_benchmark/backends/pytorch/backend.py b/optimum_benchmark/backends/pytorch/backend.py index cf96c5a6..651e6d12 100644 --- a/optimum_benchmark/backends/pytorch/backend.py +++ b/optimum_benchmark/backends/pytorch/backend.py @@ -11,12 +11,12 @@ AwqConfig, BitsAndBytesConfig, GPTQConfig, + TorchAoConfig, Trainer, TrainerCallback, TrainerState, TrainingArguments, ) -from transformers import TorchAoConfig from ...import_utils import is_deepspeed_available, is_torch_distributed_available, is_zentorch_available from ..base import Backend From 88b86faae12478eb22d730d2886822bdb0286e5b Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Wed, 27 Nov 2024 15:42:55 +0100 Subject: [PATCH 09/16] fix --- optimum_benchmark/backends/base.py | 12 +- optimum_benchmark/backends/diffusers_utils.py | 42 +--- optimum_benchmark/backends/timm_utils.py | 14 +- .../backends/transformers_utils.py | 48 +--- .../scenarios/inference/scenario.py | 26 ++- optimum_benchmark/task_utils.py | 215 +++++++++++++----- tests/configs/_st_bert_.yaml | 3 + ...yaml => cpu_inference_py_txi_st_bert.yaml} | 4 +- ...aml => cuda_inference_py_txi_st_bert.yaml} | 4 +- tests/test_api.py | 20 +- 10 files changed, 214 insertions(+), 174 deletions(-) create mode 100644 tests/configs/_st_bert_.yaml rename tests/configs/{cpu_inference_py_txi_bert.yaml => cpu_inference_py_txi_st_bert.yaml} (77%) rename tests/configs/{cuda_inference_py_txi_bert.yaml => cuda_inference_py_txi_st_bert.yaml} (77%) diff --git a/optimum_benchmark/backends/base.py b/optimum_benchmark/backends/base.py index 1c039163..8488b457 100644 --- a/optimum_benchmark/backends/base.py +++ b/optimum_benchmark/backends/base.py @@ -13,14 +13,14 @@ from .config import BackendConfigT from .diffusers_utils import ( extract_diffusers_shapes_from_model, - get_diffusers_automodel_loader_for_task, + get_diffusers_auto_pipeline_class_for_task, get_diffusers_pretrained_config, ) -from .timm_utils import extract_timm_shapes_from_config, get_timm_automodel_loader, get_timm_pretrained_config +from .timm_utils import extract_timm_shapes_from_config, get_timm_model_creator, get_timm_pretrained_config from .transformers_utils import ( PretrainedProcessor, extract_transformers_shapes_from_artifacts, - get_transformers_automodel_loader_for_task, + get_transformers_auto_model_class_for_task, get_transformers_generation_config, get_transformers_pretrained_config, get_transformers_pretrained_processor, @@ -56,7 +56,7 @@ def __init__(self, config: BackendConfigT): self.logger.info("\t+ Benchmarking a Diffusers pipeline") self.pretrained_config = get_diffusers_pretrained_config(self.config.model, **self.config.model_kwargs) self.model_shapes = extract_diffusers_shapes_from_model(self.config.model, **self.config.model_kwargs) - self.automodel_loader = get_diffusers_automodel_loader_for_task(self.config.task) + self.automodel_loader = get_diffusers_auto_pipeline_class_for_task(self.config.task) self.pretrained_processor = None self.generation_config = None @@ -64,7 +64,7 @@ def __init__(self, config: BackendConfigT): self.logger.info("\t+ Benchmarking a Timm model") self.pretrained_config = get_timm_pretrained_config(self.config.model) self.model_shapes = extract_timm_shapes_from_config(self.pretrained_config) - self.automodel_loader = get_timm_automodel_loader() + self.automodel_loader = get_timm_model_creator() self.pretrained_processor = None self.generation_config = None @@ -78,7 +78,7 @@ def __init__(self, config: BackendConfigT): else: self.logger.info("\t+ Benchmarking a Transformers model") - self.automodel_loader = get_transformers_automodel_loader_for_task(self.config.task, self.config.model_type) + self.automodel_loader = get_transformers_auto_model_class_for_task(self.config.task, self.config.model_type) self.generation_config = get_transformers_generation_config(self.config.model, **self.config.model_kwargs) self.pretrained_config = get_transformers_pretrained_config(self.config.model, **self.config.model_kwargs) self.pretrained_processor = get_transformers_pretrained_processor( diff --git a/optimum_benchmark/backends/diffusers_utils.py b/optimum_benchmark/backends/diffusers_utils.py index 43f0757b..ef1b4a59 100644 --- a/optimum_benchmark/backends/diffusers_utils.py +++ b/optimum_benchmark/backends/diffusers_utils.py @@ -9,33 +9,16 @@ import diffusers from diffusers import DiffusionPipeline - if hasattr(diffusers, "pipelines") and hasattr(diffusers.pipelines, "auto_pipeline"): - from diffusers.pipelines.auto_pipeline import ( - AUTO_IMAGE2IMAGE_PIPELINES_MAPPING, - AUTO_INPAINT_PIPELINES_MAPPING, - AUTO_TEXT2IMAGE_PIPELINES_MAPPING, - ) - - TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES = { - "inpainting": AUTO_INPAINT_PIPELINES_MAPPING.copy(), - "text-to-image": AUTO_TEXT2IMAGE_PIPELINES_MAPPING.copy(), - "image-to-image": AUTO_IMAGE2IMAGE_PIPELINES_MAPPING.copy(), - } - - for task_name, model_mapping in TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES.items(): - for model_type, model_class in model_mapping.items(): - TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES[task_name][model_type] = model_class.__name__ - else: - TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES = {} -else: - TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES = {} +def get_diffusers_auto_pipeline_class_for_task(task: str): + from ..task_utils import TASKS_TO_AUTO_PIPELINE_CLASS_NAMES -TASKS_TO_MODEL_LOADERS = { - "inpainting": "AutoPipelineForInpainting", - "text-to-image": "AutoPipelineForText2Image", - "image-to-image": "AutoPipelineForImage2Image", -} + if not is_diffusers_available(): + raise ImportError("diffusers is not available. Please, pip install diffusers.") + + model_loader_name = TASKS_TO_AUTO_PIPELINE_CLASS_NAMES.get(task, None) + model_loader_class = getattr(diffusers, model_loader_name) + return model_loader_class def get_diffusers_pretrained_config(model: str, **kwargs) -> Dict[str, int]: @@ -85,12 +68,3 @@ def extract_diffusers_shapes_from_model(model: str, **kwargs) -> Dict[str, int]: shapes["width"] = -1 return shapes - - -def get_diffusers_automodel_loader_for_task(task: str): - if not is_diffusers_available(): - raise ImportError("diffusers is not available. Please, pip install diffusers.") - - model_loader_name = TASKS_TO_MODEL_LOADERS[task] - model_loader_class = getattr(diffusers, model_loader_name) - return model_loader_class diff --git a/optimum_benchmark/backends/timm_utils.py b/optimum_benchmark/backends/timm_utils.py index dbaf36fd..4cb3cd1c 100644 --- a/optimum_benchmark/backends/timm_utils.py +++ b/optimum_benchmark/backends/timm_utils.py @@ -10,6 +10,13 @@ from timm.models import get_pretrained_cfg, load_model_config_from_hf, parse_model_name +def get_timm_model_creator(): + if not is_timm_available(): + raise ImportError("timm is not available. Please, pip install timm.") + + return create_model + + def get_timm_pretrained_config(model_name: str) -> PretrainedConfig: if not is_timm_available(): raise ImportError("timm is not available. Please, pip install timm.") @@ -71,10 +78,3 @@ def extract_timm_shapes_from_config(config: PretrainedConfig) -> Dict[str, Any]: warnings.warn("Could not extract shapes [num_channels, height, width] from timm model config.") return shapes - - -def get_timm_automodel_loader(): - if not is_timm_available(): - raise ImportError("timm is not available. Please, pip install timm.") - - return create_model diff --git a/optimum_benchmark/backends/transformers_utils.py b/optimum_benchmark/backends/transformers_utils.py index efd2b8af..7226dd7c 100644 --- a/optimum_benchmark/backends/transformers_utils.py +++ b/optimum_benchmark/backends/transformers_utils.py @@ -18,59 +18,23 @@ SpecialTokensMixin, ) -TASKS_TO_AUTOMODEL_CLASS_NAMES = { - # text processing - "feature-extraction": "AutoModel", - "fill-mask": "AutoModelForMaskedLM", - "multiple-choice": "AutoModelForMultipleChoice", - "question-answering": "AutoModelForQuestionAnswering", - "token-classification": "AutoModelForTokenClassification", - "text-classification": "AutoModelForSequenceClassification", - # audio processing - "audio-xvector": "AutoModelForAudioXVector", - "text-to-audio": "AutoModelForTextToSpectrogram", - "audio-classification": "AutoModelForAudioClassification", - "audio-frame-classification": "AutoModelForAudioFrameClassification", - # image processing - "mask-generation": "AutoModel", - "image-to-image": "AutoModelForImageToImage", - "masked-im": "AutoModelForMaskedImageModeling", - "object-detection": "AutoModelForObjectDetection", - "depth-estimation": "AutoModelForDepthEstimation", - "image-segmentation": "AutoModelForImageSegmentation", - "image-classification": "AutoModelForImageClassification", - "semantic-segmentation": "AutoModelForSemanticSegmentation", - "zero-shot-object-detection": "AutoModelForZeroShotObjectDetection", - "zero-shot-image-classification": "AutoModelForZeroShotImageClassification", - # text generation - "image-to-text": "AutoModelForVision2Seq", - "text-generation": "AutoModelForCausalLM", - "text2text-generation": "AutoModelForSeq2SeqLM", - "image-text-to-text": "AutoModelForImageTextToText", - "visual-question-answering": "AutoModelForVisualQuestionAnswering", - "automatic-speech-recognition": ("AutoModelForSpeechSeq2Seq", "AutoModelForCTC"), -} - -SYNONYM_TASKS = { - "summarization": "text2text-generation", - "sentence-similarity": "feature-extraction", -} +def get_transformers_auto_model_class_for_task(task: str, model_type: Optional[str] = None) -> Type["AutoModel"]: + from ..task_utils import SYNONYM_TASKS, TASKS_TO_AUTO_MODEL_CLASS_NAMES -def get_transformers_automodel_class_for_task(task: str, model_type: Optional[str] = None) -> Type["AutoModel"]: if task in SYNONYM_TASKS: task = SYNONYM_TASKS[task] - if task not in TASKS_TO_AUTOMODEL_CLASS_NAMES: + if task not in TASKS_TO_AUTO_MODEL_CLASS_NAMES: raise ValueError(f"Task {task} not supported") - if isinstance(TASKS_TO_AUTOMODEL_CLASS_NAMES[task], str): - return getattr(transformers, TASKS_TO_AUTOMODEL_CLASS_NAMES[task]) + if isinstance(TASKS_TO_AUTO_MODEL_CLASS_NAMES[task], str): + return getattr(transformers, TASKS_TO_AUTO_MODEL_CLASS_NAMES[task]) else: if model_type is None: raise ValueError(f"Task {task} requires a model_type to be specified") - for automodel_class_name in TASKS_TO_AUTOMODEL_CLASS_NAMES[task]: + for automodel_class_name in TASKS_TO_AUTO_MODEL_CLASS_NAMES[task]: automodel_class = getattr(transformers, automodel_class_name) if model_type in automodel_class._model_mapping._model_mapping: return automodel_class diff --git a/optimum_benchmark/scenarios/inference/scenario.py b/optimum_benchmark/scenarios/inference/scenario.py index e05cb7b9..c7faffed 100644 --- a/optimum_benchmark/scenarios/inference/scenario.py +++ b/optimum_benchmark/scenarios/inference/scenario.py @@ -66,15 +66,17 @@ def run(self, backend: Backend[BackendConfigT]) -> BenchmarkReport: self.logger.info("\t+ Updating Text Generation kwargs with default values") self.config.generate_kwargs = {**TEXT_GENERATION_DEFAULT_KWARGS, **self.config.generate_kwargs} self.logger.info("\t+ Initializing Text Generation report") - self.report = BenchmarkReport.from_list(targets=["load", "prefill", "decode", "per_token"]) + self.report = BenchmarkReport.from_list(targets=["load_model", "prefill", "decode", "per_token"]) elif self.backend.config.task in IMAGE_DIFFUSION_TASKS: self.logger.info("\t+ Updating Image Diffusion kwargs with default values") self.config.call_kwargs = {**IMAGE_DIFFUSION_DEFAULT_KWARGS, **self.config.call_kwargs} self.logger.info("\t+ Initializing Image Diffusion report") - self.report = BenchmarkReport.from_list(targets=["load", "call"]) + self.report = BenchmarkReport.from_list(targets=["load_model", "call"]) else: self.logger.info("\t+ Initializing Inference report") - self.report = BenchmarkReport.from_list(targets=["load", "forward"]) + self.report = BenchmarkReport.from_list(targets=["load_model", "forward"]) + + self.run_model_loading_tracking(backend) self.logger.info("\t+ Creating input generator") self.input_generator = InputGenerator( @@ -83,15 +85,11 @@ def run(self, backend: Backend[BackendConfigT]) -> BenchmarkReport: input_shapes=self.config.input_shapes, model_type=backend.config.model_type, ) - self.logger.info("\t+ Generating inputs") self.inputs = self.input_generator() - - self.logger.info("\t+ Preparing inputs for Inference") + self.logger.info("\t+ Preparing inputs for backend") self.inputs = backend.prepare_inputs(inputs=self.inputs) - self.run_model_loading_tracking(backend) - if self.config.latency or self.config.energy: # latency and energy are metrics that require some warmup if self.config.warmup_runs > 0: @@ -159,8 +157,14 @@ def run_model_loading_tracking(self, backend: Backend[BackendConfigT]): ) if self.config.latency: latency_tracker = LatencyTracker(backend=backend.config.name, device=backend.config.device) + if self.config.energy: + energy_tracker = EnergyTracker( + backend=backend.config.name, device=backend.config.device, device_ids=backend.config.device_ids + ) with ExitStack() as context_stack: + if self.config.energy: + context_stack.enter_context(energy_tracker.track()) if self.config.memory: context_stack.enter_context(memory_tracker.track()) if self.config.latency: @@ -169,9 +173,11 @@ def run_model_loading_tracking(self, backend: Backend[BackendConfigT]): backend.load() if self.config.latency: - self.report.load.latency = latency_tracker.get_latency() + self.report.load_model.latency = latency_tracker.get_latency() if self.config.memory: - self.report.load.memory = memory_tracker.get_max_memory() + self.report.load_model.memory = memory_tracker.get_max_memory() + if self.config.energy: + self.report.load_model.energy = energy_tracker.get_energy() ## Memory tracking def run_text_generation_memory_tracking(self, backend: Backend[BackendConfigT]): diff --git a/optimum_benchmark/task_utils.py b/optimum_benchmark/task_utils.py index 0a2a98c2..1821b47d 100644 --- a/optimum_benchmark/task_utils.py +++ b/optimum_benchmark/task_utils.py @@ -5,38 +5,94 @@ import huggingface_hub -from .backends.diffusers_utils import ( - TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES as DIFFUSERS_TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES, -) -from .backends.diffusers_utils import ( - get_diffusers_pretrained_config, -) +from .backends.diffusers_utils import get_diffusers_pretrained_config from .backends.timm_utils import get_timm_pretrained_config -from .backends.transformers_utils import ( - TASKS_TO_MODEL_LOADERS, - get_transformers_pretrained_config, -) -from .backends.transformers_utils import ( - TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES as TRANSFORMERS_TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES, -) - -_SYNONYM_TASK_MAP = { - "masked-lm": "fill-mask", - "causal-lm": "text-generation", - "default": "feature-extraction", - "vision2seq-lm": "image-to-text", - "text-to-speech": "text-to-audio", - "seq2seq-lm": "text2text-generation", - "translation": "text2text-generation", - "summarization": "text2text-generation", - "mask-generation": "feature-extraction", - "audio-ctc": "automatic-speech-recognition", - "sentence-similarity": "feature-extraction", - "speech2seq-lm": "automatic-speech-recognition", - "sequence-classification": "text-classification", - "zero-shot-classification": "text-classification", +from .backends.transformers_utils import get_transformers_pretrained_config +from .import_utils import is_diffusers_available, is_torch_available + +TASKS_TO_AUTO_MODEL_CLASS_NAMES = { + # text processing + "feature-extraction": "AutoModel", + "fill-mask": "AutoModelForMaskedLM", + "multiple-choice": "AutoModelForMultipleChoice", + "question-answering": "AutoModelForQuestionAnswering", + "token-classification": "AutoModelForTokenClassification", + "text-classification": "AutoModelForSequenceClassification", + # audio processing + "audio-xvector": "AutoModelForAudioXVector", + "text-to-audio": "AutoModelForTextToSpectrogram", + "audio-classification": "AutoModelForAudioClassification", + "audio-frame-classification": "AutoModelForAudioFrameClassification", + # image processing + "mask-generation": "AutoModel", + "image-to-image": "AutoModelForImageToImage", + "masked-im": "AutoModelForMaskedImageModeling", + "object-detection": "AutoModelForObjectDetection", + "depth-estimation": "AutoModelForDepthEstimation", + "image-segmentation": "AutoModelForImageSegmentation", + "image-classification": "AutoModelForImageClassification", + "semantic-segmentation": "AutoModelForSemanticSegmentation", + "zero-shot-object-detection": "AutoModelForZeroShotObjectDetection", + "zero-shot-image-classification": "AutoModelForZeroShotImageClassification", + # text generation + "image-to-text": "AutoModelForVision2Seq", + "text-generation": "AutoModelForCausalLM", + "text2text-generation": "AutoModelForSeq2SeqLM", + "image-text-to-text": "AutoModelForImageTextToText", + "visual-question-answering": "AutoModelForVisualQuestionAnswering", + "automatic-speech-recognition": ("AutoModelForSpeechSeq2Seq", "AutoModelForCTC"), +} + +TASKS_TO_AUTO_PIPELINE_CLASS_NAMES = { + "inpainting": "AutoPipelineForInpainting", + "text-to-image": "AutoPipelineForText2Image", + "image-to-image": "AutoPipelineForImage2Image", } +TASKS_TO_MODEL_TYPES_TO_MODEL_CLASS_NAMES = {} + +if is_torch_available(): + import transformers + + for task_name, auto_model_class_names in TASKS_TO_AUTO_MODEL_CLASS_NAMES.items(): + TASKS_TO_MODEL_TYPES_TO_MODEL_CLASS_NAMES[task_name] = {} + + if isinstance(auto_model_class_names, str): + auto_model_class_names = (auto_model_class_names,) + + for auto_model_class_name in auto_model_class_names: + auto_model_class = getattr(transformers, auto_model_class_name, None) + if auto_model_class is not None: + TASKS_TO_MODEL_TYPES_TO_MODEL_CLASS_NAMES[task_name].update( + auto_model_class._model_mapping._model_mapping + ) + + +TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES = {} + +if is_diffusers_available(): + import diffusers + + if hasattr(diffusers, "pipelines") and hasattr(diffusers.pipelines, "auto_pipeline"): + from diffusers.pipelines.auto_pipeline import ( + AUTO_IMAGE2IMAGE_PIPELINES_MAPPING, + AUTO_INPAINT_PIPELINES_MAPPING, + AUTO_TEXT2IMAGE_PIPELINES_MAPPING, + ) + + TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES = { + "inpainting": AUTO_INPAINT_PIPELINES_MAPPING.copy(), + "text-to-image": AUTO_TEXT2IMAGE_PIPELINES_MAPPING.copy(), + "image-to-image": AUTO_IMAGE2IMAGE_PIPELINES_MAPPING.copy(), + } + + for task_name, pipeline_mapping in TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES.items(): + for pipeline_type, pipeline_class in pipeline_mapping.items(): + TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES[task_name][pipeline_type] = pipeline_class.__name__ + else: + TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES = {} + + IMAGE_DIFFUSION_TASKS = [ "inpainting", "text-to-image", @@ -56,15 +112,34 @@ "feature-extraction", ] +SYNONYM_TASKS = { + "masked-lm": "fill-mask", + "causal-lm": "text-generation", + "default": "feature-extraction", + "vision2seq-lm": "image-to-text", + "text-to-speech": "text-to-audio", + "seq2seq-lm": "text2text-generation", + "translation": "text2text-generation", + "summarization": "text2text-generation", + "mask-generation": "feature-extraction", + "audio-ctc": "automatic-speech-recognition", + "sentence-similarity": "feature-extraction", + "speech2seq-lm": "automatic-speech-recognition", + "sequence-classification": "text-classification", + "zero-shot-classification": "text-classification", +} + def map_from_synonym(task: str) -> str: - if task in _SYNONYM_TASK_MAP: - task = _SYNONYM_TASK_MAP[task] + if task in SYNONYM_TASKS: + task = SYNONYM_TASKS[task] return task def infer_library_from_model_name_or_path( - model_name_or_path: str, revision: Optional[str] = None, token: Optional[str] = None + model_name_or_path: str, + token: Optional[str] = None, + revision: Optional[str] = None, ) -> str: inferred_library_name = None @@ -77,6 +152,18 @@ def infer_library_from_model_name_or_path( repo_files = huggingface_hub.list_repo_files(model_name_or_path, revision=revision, token=token) if "model_index.json" in repo_files: inferred_library_name = "diffusers" + elif "config.json" in repo_files: + config_dict = json.loads( + huggingface_hub.hf_hub_download( + repo_id=model_name_or_path, filename="config.json", revision=revision, token=token + ) + ) + if "pretrained_cfg" in config_dict or "architecture" in config_dict: + inferred_library_name = "timm" + elif "_diffusers_version" in config_dict: + inferred_library_name = "diffusers" + else: + inferred_library_name = "transformers" if inferred_library_name is None: raise RuntimeError(f"Could not infer library name from repo {model_name_or_path}.") @@ -89,6 +176,7 @@ def infer_library_from_model_name_or_path( inferred_library_name = "diffusers" elif "config.json" in local_files: config_dict = json.load(open(os.path.join(model_name_or_path, "config.json"), "r")) + if "pretrained_cfg" in config_dict or "architecture" in config_dict: inferred_library_name = "timm" elif "_diffusers_version" in config_dict: @@ -129,14 +217,36 @@ def infer_task_from_model_name_or_path( elif library_name == "sentence-transformers": inferred_task_name = "feature-extraction" + elif huggingface_hub.repo_exists(model_name_or_path, token=token): + model_info = huggingface_hub.model_info(model_name_or_path, revision=revision, token=token) + + if model_info.pipeline_tag is not None: + inferred_task_name = map_from_synonym(model_info.pipeline_tag) + + elif inferred_task_name is None: + if model_info.transformers_info is not None and model_info.transformersInfo.pipeline_tag is not None: + inferred_task_name = map_from_synonym(model_info.transformersInfo.pipeline_tag) + else: + target_auto_model = model_info.transformers_info["auto_model"] + for task_name, auto_model_class_names in TASKS_TO_AUTO_MODEL_CLASS_NAMES.items(): + if isinstance(auto_model_class_names, str): + auto_model_class_names = (auto_model_class_names,) + + for auto_model_class_name in auto_model_class_names: + if target_auto_model == auto_model_class_name: + inferred_task_name = task_name + break + if inferred_task_name is not None: + break + elif os.path.isdir(model_name_or_path): if library_name == "diffusers": diffusers_config = get_diffusers_pretrained_config(model_name_or_path, revision=revision, token=token) - class_name = diffusers_config["_class_name"] + target_class_name = diffusers_config["_class_name"] - for task_name, model_mapping in DIFFUSERS_TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES.items(): - for model_type, model_class_name in model_mapping.items(): - if class_name == model_class_name: + for task_name, pipeline_mapping in TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES.items(): + for _, pipeline_class_name in pipeline_mapping.items(): + if target_class_name == pipeline_class_name: inferred_task_name = task_name break if inferred_task_name is not None: @@ -147,7 +257,7 @@ def infer_task_from_model_name_or_path( auto_modeling_module = importlib.import_module("transformers.models.auto.modeling_auto") model_type = transformers_config.model_type - for task_name, model_loaders in TRANSFORMERS_TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES.items(): + for task_name, model_loaders in TASKS_TO_MODEL_TYPES_TO_MODEL_CLASS_NAMES.items(): if isinstance(model_loaders, str): model_loaders = (model_loaders,) for model_loader in model_loaders: @@ -159,27 +269,6 @@ def infer_task_from_model_name_or_path( if inferred_task_name is not None: break - elif huggingface_hub.repo_exists(model_name_or_path, token=token): - model_info = huggingface_hub.model_info(model_name_or_path, revision=revision, token=token) - - if model_info.pipeline_tag is not None: - inferred_task_name = map_from_synonym(model_info.pipeline_tag) - - elif inferred_task_name is None: - if model_info.transformers_info is not None and model_info.transformersInfo.pipeline_tag is not None: - inferred_task_name = map_from_synonym(model_info.transformersInfo.pipeline_tag) - else: - auto_model_class_name = model_info.transformers_info["auto_model"] - for task_name, model_loaders in TASKS_TO_MODEL_LOADERS.items(): - if isinstance(model_loaders, str): - model_loaders = (model_loaders,) - for model_loader in model_loaders: - if auto_model_class_name == model_loader: - inferred_task_name = task_name - break - if inferred_task_name is not None: - break - if inferred_task_name is None: raise KeyError(f"Could not find the proper task name for {auto_model_class_name}.") @@ -207,12 +296,12 @@ def infer_model_type_from_model_name_or_path( elif library_name == "diffusers": config = get_diffusers_pretrained_config(model_name_or_path, revision=revision, token=token) - class_name = config["_class_name"] + target_class_name = config["_class_name"] - for task_name, model_mapping in DIFFUSERS_TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES.items(): - for model_type, model_class_name in model_mapping.items(): - if model_class_name == class_name: - inferred_model_type = model_type + for _, pipeline_mapping in TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES.items(): + for pipeline_type, pipeline_class_name in pipeline_mapping.items(): + if target_class_name == pipeline_class_name: + inferred_model_type = pipeline_type break if inferred_model_type is not None: break diff --git a/tests/configs/_st_bert_.yaml b/tests/configs/_st_bert_.yaml new file mode 100644 index 00000000..05ef4026 --- /dev/null +++ b/tests/configs/_st_bert_.yaml @@ -0,0 +1,3 @@ +backend: + model: sentence-transformers/all-MiniLM-L6-v2 + task: feature-extraction diff --git a/tests/configs/cpu_inference_py_txi_bert.yaml b/tests/configs/cpu_inference_py_txi_st_bert.yaml similarity index 77% rename from tests/configs/cpu_inference_py_txi_bert.yaml rename to tests/configs/cpu_inference_py_txi_st_bert.yaml index a575be99..2650e1bf 100644 --- a/tests/configs/cpu_inference_py_txi_bert.yaml +++ b/tests/configs/cpu_inference_py_txi_st_bert.yaml @@ -3,8 +3,8 @@ defaults: - _base_ # inherits from base config - _cpu_ # inherits from cpu config - _inference_ # inherits from inference config - - _bert_ # inherits from bert config + - _st_bert_ # inherits from bert config - _self_ # hydra 1.1 compatibility - override backend: py-txi -name: cpu_inference_py_txi_bert +name: cpu_inference_py_txi_st_bert diff --git a/tests/configs/cuda_inference_py_txi_bert.yaml b/tests/configs/cuda_inference_py_txi_st_bert.yaml similarity index 77% rename from tests/configs/cuda_inference_py_txi_bert.yaml rename to tests/configs/cuda_inference_py_txi_st_bert.yaml index 62405f30..8ae494e7 100644 --- a/tests/configs/cuda_inference_py_txi_bert.yaml +++ b/tests/configs/cuda_inference_py_txi_st_bert.yaml @@ -3,8 +3,8 @@ defaults: - _base_ # inherits from base config - _cuda_ # inherits from cuda config - _inference_ # inherits from inference config - - _bert_ # inherits from bert config + - _st_bert_ # inherits from bert config - _self_ # hydra 1.1 compatibility - override backend: py-txi -name: cuda_inference_py_txi_bert +name: cuda_inference_py_txi_st_bert diff --git a/tests/test_api.py b/tests/test_api.py index fd6e2dac..01851c34 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -22,8 +22,6 @@ from optimum_benchmark.generators.dataset_generator import DatasetGenerator from optimum_benchmark.generators.input_generator import InputGenerator from optimum_benchmark.import_utils import get_git_revision_hash -from optimum_benchmark.scenarios.inference.config import INPUT_SHAPES -from optimum_benchmark.scenarios.training.config import DATASET_SHAPES from optimum_benchmark.system_utils import is_nvidia_system, is_rocm_system from optimum_benchmark.trackers import LatencyTracker, MemoryTracker @@ -40,6 +38,18 @@ ("diffusers", "text-to-image", "CompVis/stable-diffusion-v1-4"), ] +INPUT_SHAPES = { + "batch_size": 2, # for all tasks + "sequence_length": 16, # for text processing tasks + "num_choices": 2, # for multiple-choice task +} + +DATASET_SHAPES = { + "dataset_size": 2, # for all tasks + "sequence_length": 16, # for text processing tasks + "num_choices": 2, # for multiple-choice task +} + @pytest.mark.parametrize("device", ["cpu", "cuda"]) @pytest.mark.parametrize("scenario", ["training", "inference"]) @@ -47,9 +57,6 @@ def test_api_launch(device, scenario, library, task, model): benchmark_name = f"{device}_{scenario}_{library}_{task}_{model}" - if task == "multiple-choice": - INPUT_SHAPES["num_choices"] = 2 - if device == "cuda": device_isolation = True if is_rocm_system(): @@ -173,9 +180,6 @@ def test_api_input_generator(library, task, model): else: raise ValueError(f"Unknown library {library}") - if task == "multiple-choice": - INPUT_SHAPES["num_choices"] = 2 - input_generator = InputGenerator( task=task, input_shapes=INPUT_SHAPES, From 00a4c21c20ae6c561abba473ef80d2413fe0448b Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Thu, 28 Nov 2024 09:46:13 +0100 Subject: [PATCH 10/16] fix --- README.md | 1 + optimum_benchmark/backends/diffusers_utils.py | 14 +- .../backends/transformers_utils.py | 9 +- optimum_benchmark/task_utils.py | 128 +++++++++++++----- 4 files changed, 110 insertions(+), 42 deletions(-) diff --git a/README.md b/README.md index 6f96561e..6358b341 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,7 @@ Optimum-Benchmark is continuously and intensively tested on a variety of devices [![CLI_CUDA_TENSORRT_LLM](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_tensorrt_llm.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_tensorrt_llm.yaml) [![CLI_CUDA_TORCH_ORT](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_torch_ort.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_torch_ort.yaml) [![CLI_CUDA_VLLM](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_vllm.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_cuda_vllm.yaml) +[![CLI_ENERGY_STAR](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_energy_star.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_energy_star.yaml) [![CLI_MISC](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_misc.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_misc.yaml) [![CLI_ROCM_PYTORCH](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_rocm_pytorch.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cli_rocm_pytorch.yaml) diff --git a/optimum_benchmark/backends/diffusers_utils.py b/optimum_benchmark/backends/diffusers_utils.py index ef1b4a59..4572be01 100644 --- a/optimum_benchmark/backends/diffusers_utils.py +++ b/optimum_benchmark/backends/diffusers_utils.py @@ -4,6 +4,7 @@ from hydra.utils import get_class from ..import_utils import is_diffusers_available +from ..task_utils import TASKS_TO_AUTO_PIPELINE_CLASS_NAMES, map_from_synonym if is_diffusers_available(): import diffusers @@ -11,14 +12,17 @@ def get_diffusers_auto_pipeline_class_for_task(task: str): - from ..task_utils import TASKS_TO_AUTO_PIPELINE_CLASS_NAMES - if not is_diffusers_available(): raise ImportError("diffusers is not available. Please, pip install diffusers.") - model_loader_name = TASKS_TO_AUTO_PIPELINE_CLASS_NAMES.get(task, None) - model_loader_class = getattr(diffusers, model_loader_name) - return model_loader_class + task = map_from_synonym(task) + + if task not in TASKS_TO_AUTO_PIPELINE_CLASS_NAMES: + raise ValueError(f"Task {task} not supported for diffusers") + + model_loader_name = TASKS_TO_AUTO_PIPELINE_CLASS_NAMES[task] + + return getattr(diffusers, model_loader_name) def get_diffusers_pretrained_config(model: str, **kwargs) -> Dict[str, int]: diff --git a/optimum_benchmark/backends/transformers_utils.py b/optimum_benchmark/backends/transformers_utils.py index 7226dd7c..c0234ba9 100644 --- a/optimum_benchmark/backends/transformers_utils.py +++ b/optimum_benchmark/backends/transformers_utils.py @@ -18,15 +18,14 @@ SpecialTokensMixin, ) +from ..task_utils import TASKS_TO_AUTO_MODEL_CLASS_NAMES, map_from_synonym -def get_transformers_auto_model_class_for_task(task: str, model_type: Optional[str] = None) -> Type["AutoModel"]: - from ..task_utils import SYNONYM_TASKS, TASKS_TO_AUTO_MODEL_CLASS_NAMES - if task in SYNONYM_TASKS: - task = SYNONYM_TASKS[task] +def get_transformers_auto_model_class_for_task(task: str, model_type: Optional[str] = None) -> Type["AutoModel"]: + task = map_from_synonym(task) if task not in TASKS_TO_AUTO_MODEL_CLASS_NAMES: - raise ValueError(f"Task {task} not supported") + raise ValueError(f"Task {task} not supported for transformers") if isinstance(TASKS_TO_AUTO_MODEL_CLASS_NAMES[task], str): return getattr(transformers, TASKS_TO_AUTO_MODEL_CLASS_NAMES[task]) diff --git a/optimum_benchmark/task_utils.py b/optimum_benchmark/task_utils.py index 1821b47d..7e82df36 100644 --- a/optimum_benchmark/task_utils.py +++ b/optimum_benchmark/task_utils.py @@ -5,10 +5,7 @@ import huggingface_hub -from .backends.diffusers_utils import get_diffusers_pretrained_config -from .backends.timm_utils import get_timm_pretrained_config -from .backends.transformers_utils import get_transformers_pretrained_config -from .import_utils import is_diffusers_available, is_torch_available +from .import_utils import is_diffusers_available, is_torch_available, is_transformers_available TASKS_TO_AUTO_MODEL_CLASS_NAMES = { # text processing @@ -51,7 +48,7 @@ TASKS_TO_MODEL_TYPES_TO_MODEL_CLASS_NAMES = {} -if is_torch_available(): +if is_transformers_available() and is_torch_available(): import transformers for task_name, auto_model_class_names in TASKS_TO_AUTO_MODEL_CLASS_NAMES.items(): @@ -88,9 +85,8 @@ for task_name, pipeline_mapping in TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES.items(): for pipeline_type, pipeline_class in pipeline_mapping.items(): + # diffusers does not have a mappings with just class names TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES[task_name][pipeline_type] = pipeline_class.__name__ - else: - TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES = {} IMAGE_DIFFUSION_TASKS = [ @@ -133,13 +129,12 @@ def map_from_synonym(task: str) -> str: if task in SYNONYM_TASKS: task = SYNONYM_TASKS[task] + return task def infer_library_from_model_name_or_path( - model_name_or_path: str, - token: Optional[str] = None, - revision: Optional[str] = None, + model_name_or_path: str, token: Optional[str] = None, revision: Optional[str] = None ) -> str: inferred_library_name = None @@ -154,9 +149,12 @@ def infer_library_from_model_name_or_path( inferred_library_name = "diffusers" elif "config.json" in repo_files: config_dict = json.loads( - huggingface_hub.hf_hub_download( - repo_id=model_name_or_path, filename="config.json", revision=revision, token=token - ) + open( + huggingface_hub.hf_hub_download( + repo_id=model_name_or_path, filename="config.json", revision=revision, token=token + ), + mode="r", + ).read() ) if "pretrained_cfg" in config_dict or "architecture" in config_dict: inferred_library_name = "timm" @@ -164,6 +162,8 @@ def infer_library_from_model_name_or_path( inferred_library_name = "diffusers" else: inferred_library_name = "transformers" + elif "onfig_sentence_transformers.json" in repo_files: + inferred_library_name = "sentence-transformers" if inferred_library_name is None: raise RuntimeError(f"Could not infer library name from repo {model_name_or_path}.") @@ -175,7 +175,12 @@ def infer_library_from_model_name_or_path( if "model_index.json" in local_files: inferred_library_name = "diffusers" elif "config.json" in local_files: - config_dict = json.load(open(os.path.join(model_name_or_path, "config.json"), "r")) + config_dict = json.load( + open( + os.path.join(model_name_or_path, "config.json"), + mode="r", + ) + ) if "pretrained_cfg" in config_dict or "architecture" in config_dict: inferred_library_name = "timm" @@ -183,6 +188,8 @@ def infer_library_from_model_name_or_path( inferred_library_name = "diffusers" else: inferred_library_name = "transformers" + elif "config_sentence_transformers.json" in local_files: + inferred_library_name = "sentence-transformers" if inferred_library_name is None: raise KeyError(f"Could not find the proper library name for directory {model_name_or_path}.") @@ -202,9 +209,9 @@ def infer_library_from_model_name_or_path( def infer_task_from_model_name_or_path( model_name_or_path: str, - library_name: Optional[str] = None, - revision: Optional[str] = None, token: Optional[str] = None, + revision: Optional[str] = None, + library_name: Optional[str] = None, ) -> str: if library_name is None: library_name = infer_library_from_model_name_or_path(model_name_or_path, revision=revision, token=token) @@ -215,17 +222,17 @@ def infer_task_from_model_name_or_path( inferred_task_name = "image-classification" elif library_name == "sentence-transformers": - inferred_task_name = "feature-extraction" + inferred_task_name = "sentence-similarity" elif huggingface_hub.repo_exists(model_name_or_path, token=token): model_info = huggingface_hub.model_info(model_name_or_path, revision=revision, token=token) if model_info.pipeline_tag is not None: - inferred_task_name = map_from_synonym(model_info.pipeline_tag) + inferred_task_name = model_info.pipeline_tag elif inferred_task_name is None: if model_info.transformers_info is not None and model_info.transformersInfo.pipeline_tag is not None: - inferred_task_name = map_from_synonym(model_info.transformersInfo.pipeline_tag) + inferred_task_name = model_info.transformersInfo.pipeline_tag else: target_auto_model = model_info.transformers_info["auto_model"] for task_name, auto_model_class_names in TASKS_TO_AUTO_MODEL_CLASS_NAMES.items(): @@ -241,7 +248,12 @@ def infer_task_from_model_name_or_path( elif os.path.isdir(model_name_or_path): if library_name == "diffusers": - diffusers_config = get_diffusers_pretrained_config(model_name_or_path, revision=revision, token=token) + diffusers_config = json.load( + open( + os.path.join(model_name_or_path, "model_index.json"), + mode="r", + ) + ) target_class_name = diffusers_config["_class_name"] for task_name, pipeline_mapping in TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES.items(): @@ -253,7 +265,12 @@ def infer_task_from_model_name_or_path( break elif library_name == "transformers": - transformers_config = get_transformers_pretrained_config(model_name_or_path, revision=revision, token=token) + transformers_config = json.load( + open( + os.path.join(model_name_or_path, "config.json"), + mode="r", + ) + ) auto_modeling_module = importlib.import_module("transformers.models.auto.modeling_auto") model_type = transformers_config.model_type @@ -272,15 +289,16 @@ def infer_task_from_model_name_or_path( if inferred_task_name is None: raise KeyError(f"Could not find the proper task name for {auto_model_class_name}.") + inferred_task_name = map_from_synonym(inferred_task_name) + return inferred_task_name def infer_model_type_from_model_name_or_path( model_name_or_path: str, - library_name: Optional[str] = None, - revision: Optional[str] = None, token: Optional[str] = None, - trust_remote_code: bool = False, + revision: Optional[str] = None, + library_name: Optional[str] = None, ) -> str: if library_name is None: library_name = infer_library_from_model_name_or_path(model_name_or_path, revision=revision, token=token) @@ -291,12 +309,44 @@ def infer_model_type_from_model_name_or_path( inferred_model_type = "llama_cpp" elif library_name == "timm": - timm_config = get_timm_pretrained_config(model_name_or_path) - inferred_model_type = timm_config.architecture + if huggingface_hub.repo_exists(model_name_or_path, token=token): + timm_config = json.loads( + open( + huggingface_hub.hf_hub_download( + repo_id=model_name_or_path, filename="config.json", revision=revision, token=token + ), + mode="r", + ).read() + ) + else: + timm_config = json.load( + open( + os.path.join(model_name_or_path, "config.json"), + mode="r", + ) + ) + + inferred_model_type = timm_config["architecture"] elif library_name == "diffusers": - config = get_diffusers_pretrained_config(model_name_or_path, revision=revision, token=token) - target_class_name = config["_class_name"] + if huggingface_hub.repo_exists(model_name_or_path, token=token): + diffusers_config = json.loads( + open( + huggingface_hub.hf_hub_download( + repo_id=model_name_or_path, filename="model_index.json", revision=revision, token=token + ), + mode="r", + ).read() + ) + else: + diffusers_config = json.load( + open( + os.path.join(model_name_or_path, "model_index.json"), + mode="r", + ) + ) + + target_class_name = diffusers_config["_class_name"] for _, pipeline_mapping in TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES.items(): for pipeline_type, pipeline_class_name in pipeline_mapping.items(): @@ -307,10 +357,24 @@ def infer_model_type_from_model_name_or_path( break else: - transformers_config = get_transformers_pretrained_config( - model_name_or_path, revision=revision, token=token, trust_remote_code=trust_remote_code - ) - inferred_model_type = transformers_config.model_type + if huggingface_hub.repo_exists(model_name_or_path, token=token): + transformers_config = json.loads( + open( + huggingface_hub.hf_hub_download( + repo_id=model_name_or_path, filename="config.json", revision=revision, token=token + ), + mode="r", + ).read() + ) + else: + transformers_config = json.load( + open( + os.path.join(model_name_or_path, "config.json"), + mode="r", + ) + ) + + inferred_model_type = transformers_config["model_type"] if inferred_model_type is None: raise KeyError(f"Could not find the proper model type for {model_name_or_path}.") From 802975e05ea147528108d0c5d6e826376c7a1ee8 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Thu, 28 Nov 2024 09:50:23 +0100 Subject: [PATCH 11/16] fix --- optimum_benchmark/backends/config.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/optimum_benchmark/backends/config.py b/optimum_benchmark/backends/config.py index cc4f6a24..fc265d4d 100644 --- a/optimum_benchmark/backends/config.py +++ b/optimum_benchmark/backends/config.py @@ -54,26 +54,25 @@ def __post_init__(self): # TODO: add cache_dir, token, etc. to these methods if self.library is None: self.library = infer_library_from_model_name_or_path( - self.model, + model_name_or_path=self.model, token=self.model_kwargs.get("token", None), revision=self.model_kwargs.get("revision", None), ) if self.task is None: self.task = infer_task_from_model_name_or_path( - self.model, - self.library, + model_name_or_path=self.model, token=self.model_kwargs.get("token", None), revision=self.model_kwargs.get("revision", None), + library_name=self.library, ) if self.model_type is None: self.model_type = infer_model_type_from_model_name_or_path( - self.model, - self.library, + model_name_or_path=self.model, token=self.model_kwargs.get("token", None), revision=self.model_kwargs.get("revision", None), - trust_remote_code=self.model_kwargs.get("trust_remote_code", False), + library_name=self.library, ) if self.device is None: From f9cf245fe6995df2c5f23bc77b92e835e634f9fd Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Thu, 28 Nov 2024 10:30:31 +0100 Subject: [PATCH 12/16] better task utils that support local repos --- optimum_benchmark/task_utils.py | 274 +++++++++++--------------------- 1 file changed, 97 insertions(+), 177 deletions(-) diff --git a/optimum_benchmark/task_utils.py b/optimum_benchmark/task_utils.py index 7e82df36..cc9dff5c 100644 --- a/optimum_benchmark/task_utils.py +++ b/optimum_benchmark/task_utils.py @@ -1,4 +1,3 @@ -import importlib import json import os from typing import Optional @@ -133,74 +132,83 @@ def map_from_synonym(task: str) -> str: return task +def is_hf_hub_repo(model_name_or_path: str, token: Optional[str] = None) -> bool: + try: + return huggingface_hub.repo_exists(model_name_or_path, token=token) + except Exception: + return False + + +def is_local_dir_repo(model_name_or_path: str) -> bool: + return os.path.isdir(model_name_or_path) + + +def get_repo_config( + model_name_or_path: str, config_name: str, token: Optional[str] = None, revision: Optional[str] = None +): + if is_hf_hub_repo(model_name_or_path, token=token): + config = json.loads( + open( + huggingface_hub.hf_hub_download( + repo_id=model_name_or_path, + filename=config_name, + revision=revision, + token=token, + ), + mode="r", + ) + ) + elif is_local_dir_repo(model_name_or_path): + config = json.load( + open( + os.path.join(model_name_or_path, config_name), + mode="r", + ) + ) + else: + raise KeyError(f"`{model_name_or_path}` is neither an hf hub repo nor a local directory.") + + return config + + +def get_repo_files(model_name_or_path: str, token: Optional[str] = None, revision: Optional[str] = None): + if is_hf_hub_repo(model_name_or_path, token=token): + repo_files = huggingface_hub.list_repo_files(model_name_or_path, revision=revision, token=token) + elif is_local_dir_repo(model_name_or_path): + repo_files = os.listdir(model_name_or_path) + else: + raise KeyError(f"`{model_name_or_path}` is neither an hf hub repo nor a local directory.") + + return repo_files + + def infer_library_from_model_name_or_path( - model_name_or_path: str, token: Optional[str] = None, revision: Optional[str] = None + model_name_or_path: str, + token: Optional[str] = None, + revision: Optional[str] = None, ) -> str: inferred_library_name = None - # if model_name_or_path is a repo - if huggingface_hub.repo_exists(model_name_or_path, token=token): - model_info = huggingface_hub.model_info(model_name_or_path, revision=revision, token=token) - inferred_library_name = getattr(model_info, "library_name", None) - - if inferred_library_name is None: - repo_files = huggingface_hub.list_repo_files(model_name_or_path, revision=revision, token=token) - if "model_index.json" in repo_files: - inferred_library_name = "diffusers" - elif "config.json" in repo_files: - config_dict = json.loads( - open( - huggingface_hub.hf_hub_download( - repo_id=model_name_or_path, filename="config.json", revision=revision, token=token - ), - mode="r", - ).read() - ) - if "pretrained_cfg" in config_dict or "architecture" in config_dict: - inferred_library_name = "timm" - elif "_diffusers_version" in config_dict: - inferred_library_name = "diffusers" - else: - inferred_library_name = "transformers" - elif "onfig_sentence_transformers.json" in repo_files: - inferred_library_name = "sentence-transformers" - - if inferred_library_name is None: - raise RuntimeError(f"Could not infer library name from repo {model_name_or_path}.") - - # if model_name_or_path is a directory - elif os.path.isdir(model_name_or_path): - local_files = os.listdir(model_name_or_path) - - if "model_index.json" in local_files: - inferred_library_name = "diffusers" - elif "config.json" in local_files: - config_dict = json.load( - open( - os.path.join(model_name_or_path, "config.json"), - mode="r", - ) - ) + repo_files = get_repo_files(model_name_or_path, token=token, revision=revision) - if "pretrained_cfg" in config_dict or "architecture" in config_dict: - inferred_library_name = "timm" - elif "_diffusers_version" in config_dict: - inferred_library_name = "diffusers" - else: - inferred_library_name = "transformers" - elif "config_sentence_transformers.json" in local_files: - inferred_library_name = "sentence-transformers" + if "model_index.json" in repo_files: + inferred_library_name = "diffusers" - if inferred_library_name is None: - raise KeyError(f"Could not find the proper library name for directory {model_name_or_path}.") + elif "config_sentence_transformers.json" in repo_files: + inferred_library_name = "sentence-transformers" - else: - raise KeyError( - f"Could not find the proper library name for {model_name_or_path}" - " because it's neither a repo nor a directory." - ) + elif "config.json" in repo_files: + config_dict = get_repo_config(model_name_or_path, token=token, revision=revision) - # for now, we still use transformers for sentence-transformers + if "pretrained_cfg" in config_dict: + inferred_library_name = "timm" + else: + inferred_library_name = "transformers" + + if inferred_library_name is None: + raise KeyError(f"Could not find the proper library name for directory {model_name_or_path}.") + + # for now, we use transformers for sentence-transformers models if inferred_library_name == "sentence-transformers": inferred_library_name = "transformers" @@ -213,85 +221,45 @@ def infer_task_from_model_name_or_path( revision: Optional[str] = None, library_name: Optional[str] = None, ) -> str: + inferred_task_name = None + if library_name is None: library_name = infer_library_from_model_name_or_path(model_name_or_path, revision=revision, token=token) - inferred_task_name = None - if library_name == "timm": inferred_task_name = "image-classification" elif library_name == "sentence-transformers": inferred_task_name = "sentence-similarity" - elif huggingface_hub.repo_exists(model_name_or_path, token=token): - model_info = huggingface_hub.model_info(model_name_or_path, revision=revision, token=token) - - if model_info.pipeline_tag is not None: - inferred_task_name = model_info.pipeline_tag - - elif inferred_task_name is None: - if model_info.transformers_info is not None and model_info.transformersInfo.pipeline_tag is not None: - inferred_task_name = model_info.transformersInfo.pipeline_tag - else: - target_auto_model = model_info.transformers_info["auto_model"] - for task_name, auto_model_class_names in TASKS_TO_AUTO_MODEL_CLASS_NAMES.items(): - if isinstance(auto_model_class_names, str): - auto_model_class_names = (auto_model_class_names,) - - for auto_model_class_name in auto_model_class_names: - if target_auto_model == auto_model_class_name: - inferred_task_name = task_name - break - if inferred_task_name is not None: - break - - elif os.path.isdir(model_name_or_path): - if library_name == "diffusers": - diffusers_config = json.load( - open( - os.path.join(model_name_or_path, "model_index.json"), - mode="r", - ) - ) - target_class_name = diffusers_config["_class_name"] - - for task_name, pipeline_mapping in TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES.items(): - for _, pipeline_class_name in pipeline_mapping.items(): - if target_class_name == pipeline_class_name: - inferred_task_name = task_name - break - if inferred_task_name is not None: + elif library_name == "diffusers": + diffusers_config = get_repo_config(model_name_or_path, "model_index.json", token=token, revision=revision) + target_class_name = diffusers_config["_class_name"] + + for task_name, pipeline_mapping in TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES.items(): + for _, pipeline_class_name in pipeline_mapping.items(): + if target_class_name == pipeline_class_name: + inferred_task_name = task_name break + if inferred_task_name is not None: + break - elif library_name == "transformers": - transformers_config = json.load( - open( - os.path.join(model_name_or_path, "config.json"), - mode="r", - ) - ) - auto_modeling_module = importlib.import_module("transformers.models.auto.modeling_auto") - model_type = transformers_config.model_type - - for task_name, model_loaders in TASKS_TO_MODEL_TYPES_TO_MODEL_CLASS_NAMES.items(): - if isinstance(model_loaders, str): - model_loaders = (model_loaders,) - for model_loader in model_loaders: - model_loader_class = getattr(auto_modeling_module, model_loader) - model_mapping = model_loader_class._model_mapping._model_mapping - if model_type in model_mapping: - inferred_task_name = task_name - break - if inferred_task_name is not None: + elif library_name == "transformers": + transformers_config = get_repo_config(model_name_or_path, "config.json", token=token, revision=revision) + target_class_name = transformers_config["architectures"][0] + + for task_name, model_mapping in TASKS_TO_MODEL_TYPES_TO_MODEL_CLASS_NAMES.items(): + for _, model_class_name in model_mapping.items(): + if target_class_name == model_class_name: + inferred_task_name = task_name break + if inferred_task_name is not None: + break if inferred_task_name is None: raise KeyError(f"Could not find the proper task name for {auto_model_class_name}.") - inferred_task_name = map_from_synonym(inferred_task_name) - - return inferred_task_name + return map_from_synonym(inferred_task_name) def infer_model_type_from_model_name_or_path( @@ -300,52 +268,20 @@ def infer_model_type_from_model_name_or_path( revision: Optional[str] = None, library_name: Optional[str] = None, ) -> str: + inferred_model_type = None + if library_name is None: library_name = infer_library_from_model_name_or_path(model_name_or_path, revision=revision, token=token) - inferred_model_type = None - if library_name == "llama_cpp": inferred_model_type = "llama_cpp" elif library_name == "timm": - if huggingface_hub.repo_exists(model_name_or_path, token=token): - timm_config = json.loads( - open( - huggingface_hub.hf_hub_download( - repo_id=model_name_or_path, filename="config.json", revision=revision, token=token - ), - mode="r", - ).read() - ) - else: - timm_config = json.load( - open( - os.path.join(model_name_or_path, "config.json"), - mode="r", - ) - ) - + timm_config = get_repo_config(model_name_or_path, "config.json", token=token, revision=revision) inferred_model_type = timm_config["architecture"] elif library_name == "diffusers": - if huggingface_hub.repo_exists(model_name_or_path, token=token): - diffusers_config = json.loads( - open( - huggingface_hub.hf_hub_download( - repo_id=model_name_or_path, filename="model_index.json", revision=revision, token=token - ), - mode="r", - ).read() - ) - else: - diffusers_config = json.load( - open( - os.path.join(model_name_or_path, "model_index.json"), - mode="r", - ) - ) - + diffusers_config = get_repo_config(model_name_or_path, "model_index.json", token=token, revision=revision) target_class_name = diffusers_config["_class_name"] for _, pipeline_mapping in TASKS_TO_PIPELINE_TYPES_TO_PIPELINE_CLASS_NAMES.items(): @@ -357,23 +293,7 @@ def infer_model_type_from_model_name_or_path( break else: - if huggingface_hub.repo_exists(model_name_or_path, token=token): - transformers_config = json.loads( - open( - huggingface_hub.hf_hub_download( - repo_id=model_name_or_path, filename="config.json", revision=revision, token=token - ), - mode="r", - ).read() - ) - else: - transformers_config = json.load( - open( - os.path.join(model_name_or_path, "config.json"), - mode="r", - ) - ) - + transformers_config = get_repo_config(model_name_or_path, "config.json", token=token, revision=revision) inferred_model_type = transformers_config["model_type"] if inferred_model_type is None: From 27b5d9a0250885a9dd01ad5bfecde8a1e24c6c5e Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Thu, 28 Nov 2024 10:48:46 +0100 Subject: [PATCH 13/16] fix --- optimum_benchmark/task_utils.py | 36 +++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/optimum_benchmark/task_utils.py b/optimum_benchmark/task_utils.py index cc9dff5c..31d0cc77 100644 --- a/optimum_benchmark/task_utils.py +++ b/optimum_benchmark/task_utils.py @@ -124,14 +124,25 @@ "zero-shot-classification": "text-classification", } +SYNONYM_LIBRARIES = { + "sentence-transformers": "transformers", +} + -def map_from_synonym(task: str) -> str: +def map_from_synonym_task(task: str) -> str: if task in SYNONYM_TASKS: task = SYNONYM_TASKS[task] return task +def map_from_synonym_library(library: str) -> str: + if library in SYNONYM_LIBRARIES: + library = SYNONYM_LIBRARIES[library] + + return library + + def is_hf_hub_repo(model_name_or_path: str, token: Optional[str] = None) -> bool: try: return huggingface_hub.repo_exists(model_name_or_path, token=token) @@ -198,21 +209,20 @@ def infer_library_from_model_name_or_path( inferred_library_name = "sentence-transformers" elif "config.json" in repo_files: - config_dict = get_repo_config(model_name_or_path, token=token, revision=revision) + config_dict = get_repo_config(model_name_or_path, "config.json", token=token, revision=revision) if "pretrained_cfg" in config_dict: inferred_library_name = "timm" else: inferred_library_name = "transformers" + elif any(file.endswith(".gguf") or file.endswith(".GGUF") for file in repo_files): + inferred_library_name = "llama_cpp" + if inferred_library_name is None: raise KeyError(f"Could not find the proper library name for directory {model_name_or_path}.") - # for now, we use transformers for sentence-transformers models - if inferred_library_name == "sentence-transformers": - inferred_library_name = "transformers" - - return inferred_library_name + return map_from_synonym_library(inferred_library_name) def infer_task_from_model_name_or_path( @@ -226,11 +236,11 @@ def infer_task_from_model_name_or_path( if library_name is None: library_name = infer_library_from_model_name_or_path(model_name_or_path, revision=revision, token=token) - if library_name == "timm": - inferred_task_name = "image-classification" + if library_name == "llama_cpp": + inferred_task_name = "text-generation" - elif library_name == "sentence-transformers": - inferred_task_name = "sentence-similarity" + elif library_name == "timm": + inferred_task_name = "image-classification" elif library_name == "diffusers": diffusers_config = get_repo_config(model_name_or_path, "model_index.json", token=token, revision=revision) @@ -259,7 +269,7 @@ def infer_task_from_model_name_or_path( if inferred_task_name is None: raise KeyError(f"Could not find the proper task name for {auto_model_class_name}.") - return map_from_synonym(inferred_task_name) + return map_from_synonym_task(inferred_task_name) def infer_model_type_from_model_name_or_path( @@ -292,7 +302,7 @@ def infer_model_type_from_model_name_or_path( if inferred_model_type is not None: break - else: + elif library_name == "transformers": transformers_config = get_repo_config(model_name_or_path, "config.json", token=token, revision=revision) inferred_model_type = transformers_config["model_type"] From f9b6e6f920fee2d668b7e2c5289b4332ac2d373d Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Thu, 28 Nov 2024 10:49:41 +0100 Subject: [PATCH 14/16] fix --- optimum_benchmark/backends/diffusers_utils.py | 4 ++-- optimum_benchmark/backends/transformers_utils.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/optimum_benchmark/backends/diffusers_utils.py b/optimum_benchmark/backends/diffusers_utils.py index 4572be01..126d724d 100644 --- a/optimum_benchmark/backends/diffusers_utils.py +++ b/optimum_benchmark/backends/diffusers_utils.py @@ -4,7 +4,7 @@ from hydra.utils import get_class from ..import_utils import is_diffusers_available -from ..task_utils import TASKS_TO_AUTO_PIPELINE_CLASS_NAMES, map_from_synonym +from ..task_utils import TASKS_TO_AUTO_PIPELINE_CLASS_NAMES, map_from_synonym_task if is_diffusers_available(): import diffusers @@ -15,7 +15,7 @@ def get_diffusers_auto_pipeline_class_for_task(task: str): if not is_diffusers_available(): raise ImportError("diffusers is not available. Please, pip install diffusers.") - task = map_from_synonym(task) + task = map_from_synonym_task(task) if task not in TASKS_TO_AUTO_PIPELINE_CLASS_NAMES: raise ValueError(f"Task {task} not supported for diffusers") diff --git a/optimum_benchmark/backends/transformers_utils.py b/optimum_benchmark/backends/transformers_utils.py index c0234ba9..58feb7af 100644 --- a/optimum_benchmark/backends/transformers_utils.py +++ b/optimum_benchmark/backends/transformers_utils.py @@ -18,11 +18,11 @@ SpecialTokensMixin, ) -from ..task_utils import TASKS_TO_AUTO_MODEL_CLASS_NAMES, map_from_synonym +from ..task_utils import TASKS_TO_AUTO_MODEL_CLASS_NAMES, map_from_synonym_task def get_transformers_auto_model_class_for_task(task: str, model_type: Optional[str] = None) -> Type["AutoModel"]: - task = map_from_synonym(task) + task = map_from_synonym_task(task) if task not in TASKS_TO_AUTO_MODEL_CLASS_NAMES: raise ValueError(f"Task {task} not supported for transformers") From f85fea8e5d282dc63dcc30d5881a6bd296ea5f03 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Thu, 28 Nov 2024 10:53:26 +0100 Subject: [PATCH 15/16] fix --- optimum_benchmark/task_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum_benchmark/task_utils.py b/optimum_benchmark/task_utils.py index 31d0cc77..c5b43739 100644 --- a/optimum_benchmark/task_utils.py +++ b/optimum_benchmark/task_utils.py @@ -158,7 +158,7 @@ def get_repo_config( model_name_or_path: str, config_name: str, token: Optional[str] = None, revision: Optional[str] = None ): if is_hf_hub_repo(model_name_or_path, token=token): - config = json.loads( + config = json.load( open( huggingface_hub.hf_hub_download( repo_id=model_name_or_path, From 7343ce83e8ae5eabbae9ef800bec6a2df2a9ebff Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Thu, 28 Nov 2024 11:01:51 +0100 Subject: [PATCH 16/16] style --- optimum_benchmark/backends/diffusers_utils.py | 4 ++-- optimum_benchmark/backends/peft_utils.py | 3 ++- optimum_benchmark/backends/timm_utils.py | 8 ++------ optimum_benchmark/backends/transformers_utils.py | 6 +++--- 4 files changed, 9 insertions(+), 12 deletions(-) diff --git a/optimum_benchmark/backends/diffusers_utils.py b/optimum_benchmark/backends/diffusers_utils.py index 126d724d..345d30f0 100644 --- a/optimum_benchmark/backends/diffusers_utils.py +++ b/optimum_benchmark/backends/diffusers_utils.py @@ -12,11 +12,11 @@ def get_diffusers_auto_pipeline_class_for_task(task: str): + task = map_from_synonym_task(task) + if not is_diffusers_available(): raise ImportError("diffusers is not available. Please, pip install diffusers.") - task = map_from_synonym_task(task) - if task not in TASKS_TO_AUTO_PIPELINE_CLASS_NAMES: raise ValueError(f"Task {task} not supported for diffusers") diff --git a/optimum_benchmark/backends/peft_utils.py b/optimum_benchmark/backends/peft_utils.py index 92e71039..95b54b1e 100644 --- a/optimum_benchmark/backends/peft_utils.py +++ b/optimum_benchmark/backends/peft_utils.py @@ -8,9 +8,10 @@ from peft import PEFT_TYPE_TO_CONFIG_MAPPING, get_peft_model # type: ignore -def apply_peft(model: PreTrainedModel, peft_type: str, peft_config: Dict[str, Any]) -> PreTrainedModel: +def apply_peft(model: "PreTrainedModel", peft_type: str, peft_config: Dict[str, Any]) -> "PreTrainedModel": if not is_peft_available(): raise ImportError("peft is not available. Please, pip install peft.") peft_config = PEFT_TYPE_TO_CONFIG_MAPPING[peft_type](**peft_config) + return get_peft_model(model=model, peft_config=peft_config) diff --git a/optimum_benchmark/backends/timm_utils.py b/optimum_benchmark/backends/timm_utils.py index 4cb3cd1c..7dc26e12 100644 --- a/optimum_benchmark/backends/timm_utils.py +++ b/optimum_benchmark/backends/timm_utils.py @@ -1,4 +1,3 @@ -import warnings from typing import Any, Dict from transformers import PretrainedConfig @@ -17,7 +16,7 @@ def get_timm_model_creator(): return create_model -def get_timm_pretrained_config(model_name: str) -> PretrainedConfig: +def get_timm_pretrained_config(model_name: str) -> "PretrainedConfig": if not is_timm_available(): raise ImportError("timm is not available. Please, pip install timm.") @@ -31,7 +30,7 @@ def get_timm_pretrained_config(model_name: str) -> PretrainedConfig: return get_pretrained_cfg(model_name) -def extract_timm_shapes_from_config(config: PretrainedConfig) -> Dict[str, Any]: +def extract_timm_shapes_from_config(config: "PretrainedConfig") -> Dict[str, Any]: if not is_timm_available(): raise ImportError("timm is not available. Please, pip install timm.") @@ -74,7 +73,4 @@ def extract_timm_shapes_from_config(config: PretrainedConfig) -> Dict[str, Any]: shapes["height"] = input_size[1] shapes["width"] = input_size[2] - if "num_classes" not in artifacts_dict: - warnings.warn("Could not extract shapes [num_channels, height, width] from timm model config.") - return shapes diff --git a/optimum_benchmark/backends/transformers_utils.py b/optimum_benchmark/backends/transformers_utils.py index 58feb7af..a623d43c 100644 --- a/optimum_benchmark/backends/transformers_utils.py +++ b/optimum_benchmark/backends/transformers_utils.py @@ -3,6 +3,7 @@ import torch import transformers +from torch import Tensor from transformers import ( AutoConfig, AutoFeatureExtractor, @@ -84,7 +85,7 @@ def get_flat_dict(d: Dict[str, Any]) -> Dict[str, Any]: return flat_dict -def get_flat_artifact_dict(artifact: Union[PretrainedConfig, PretrainedProcessor]) -> Dict[str, Any]: +def get_flat_artifact_dict(artifact: Union["PretrainedConfig", "PretrainedProcessor"]) -> Dict[str, Any]: artifact_dict = {} if isinstance(artifact, ProcessorMixin): @@ -175,7 +176,6 @@ def extract_transformers_shapes_from_artifacts( shapes["num_queries"] = flat_artifacts_dict["num_queries"] # image-text input - if "patch_size" in flat_artifacts_dict: shapes["patch_size"] = flat_artifacts_dict["patch_size"] if "in_chans" in flat_artifacts_dict: @@ -212,7 +212,7 @@ def extract_transformers_shapes_from_artifacts( } -def fast_random_tensor(tensor: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor: +def fast_random_tensor(tensor: "Tensor", *args: Any, **kwargs: Any) -> "Tensor": return torch.nn.init.uniform_(tensor)