style

huggingface · Feb 19, 2024 · 81d0de9 · 81d0de9
1 parent fc75c83
commit 81d0de9
Show file tree

Hide file tree

Showing 42 changed files with 188 additions and 708 deletions.
diff --git a/Makefile b/Makefile
@@ -16,10 +16,10 @@ build_docker_cpu:
 	docker build -f docker/cpu.dockerfile  --build-arg USER_ID=$(shell id -u) --build-arg GROUP_ID=$(shell id -g) -t opt-bench-cpu:latest .
 
 build_docker_cuda:
-	docker build -f docker/cuda.dockerfile  --build-arg USER_ID=$(shell id -u) --build-arg GROUP_ID=$(shell id -g) --build-arg TORCH_CUDA=cu118 --build-arg CUDA_VERSION=11.8.0 -t opt-bench-cuda:11.8.0 . 
+	docker build -f docker/cuda.dockerfile  --build-arg USER_ID=$(shell id -u) --build-arg GROUP_ID=$(shell id -g) -t opt-bench-cuda:latest . 
 
 build_docker_rocm:
-	docker build -f docker/rocm.dockerfile  --build-arg USER_ID=$(shell id -u) --build-arg GROUP_ID=$(shell id -g) --build-arg TORCH_ROCM=rocm5.6 --build-arg ROCM_VERSION=5.6.1 -t opt-bench-rocm:5.6.1 . 
+	docker build -f docker/rocm.dockerfile  --build-arg USER_ID=$(shell id -u) --build-arg GROUP_ID=$(shell id -g) -t opt-bench-rocm:latest . 
 
 test_cli_cpu_neural_compressor:
 	docker run \
@@ -63,7 +63,7 @@ test_cli_rocm_pytorch:
 	--entrypoint /bin/bash \
 	--volume $(PWD):/workspace \
 	--workdir /workspace \
-	opt-bench-rocm:5.6.1 -c "pip install -e .[testing,diffusers,timm,deepspeed,peft] && pytest tests/ -k 'cli and cuda and pytorch' -x"
+	opt-bench-rocm:latest -c "pip install -e .[testing,diffusers,timm,deepspeed,peft] && pytest tests/ -k 'cli and cuda and pytorch' -x"
 
 test_cli_cuda_pytorch:
 	docker run \
@@ -72,7 +72,7 @@ test_cli_cuda_pytorch:
 	--entrypoint /bin/bash \
 	--volume $(PWD):/workspace \
 	--workdir /workspace \
-	opt-bench-cuda:11.8.0 -c "pip install -e .[testing,diffusers,timm,deepspeed,peft] && pytest tests/ -k 'cli and cuda and pytorch' -x"
+	opt-bench-cuda:latest -c "pip install -e .[testing,diffusers,timm,deepspeed,peft] && pytest tests/ -k 'cli and cuda and pytorch' -x"
 
 test_api_cpu:
 	docker run \
@@ -89,7 +89,7 @@ test_api_cuda:
 	--entrypoint /bin/bash \
 	--volume $(PWD):/workspace \
 	--workdir /workspace \
-	opt-bench-cuda:11.8.0 -c "pip install -e .[testing,timm,diffusers] && pytest tests/ -k 'api and cuda' -x"
+	opt-bench-cuda:latest -c "pip install -e .[testing,timm,diffusers] && pytest tests/ -k 'api and cuda' -x"
 
 test_api_rocm:
 	docker run \
@@ -101,7 +101,7 @@ test_api_rocm:
 	--entrypoint /bin/bash \
 	--volume $(PWD):/workspace \
 	--workdir /workspace \
-	opt-bench-rocm:5.6.1 -c "pip install -e .[testing,timm,diffusers] && pytest tests/ -k 'api and cuda' -x"
+	opt-bench-rocm:latest -c "pip install -e .[testing,timm,diffusers] && pytest tests/ -k 'api and cuda' -x"
 
 test_api_misc:
 	docker run \

diff --git a/optimum_benchmark/backends/base.py b/optimum_benchmark/backends/base.py
@@ -61,10 +61,7 @@ def __init__(self, config: BackendConfigT):
             self.model_type = self.pretrained_config.model_type
 
         self.automodel_class = get_automodel_class_for_task(
-            model_type=self.model_type,
-            library=self.config.library,
-            task=self.config.task,
-            framework="pt",
+            model_type=self.model_type, library=self.config.library, task=self.config.task, framework="pt"
         )
 
     def seed(self) -> None:

diff --git a/optimum_benchmark/backends/config.py b/optimum_benchmark/backends/config.py
@@ -11,12 +11,7 @@
 
 LOGGER = getLogger("backend")
 
-HUB_KWARGS = {
-    "revision": "main",
-    "force_download": False,
-    "local_files_only": False,
-    "trust_remote_code": False,
-}
+HUB_KWARGS = {"revision": "main", "force_download": False, "local_files_only": False, "trust_remote_code": False}
 
 
 @dataclass

diff --git a/optimum_benchmark/backends/neural_compressor/backend.py b/optimum_benchmark/backends/neural_compressor/backend.py
@@ -111,9 +111,7 @@ def quantize_automodel(self) -> None:
         quantized_model_path = f"{self.tmpdir.name}/quantized"
         LOGGER.info("\t+ Processing quantization config")
         ptq_quantization_config = self.config.ptq_quantization_config.copy()
-        ptq_quantization_config["accuracy_criterion"] = AccuracyCriterion(
-            **ptq_quantization_config["accuracy_criterion"]
-        )
+        ptq_quantization_config["accuracy_criterion"] = AccuracyCriterion(**ptq_quantization_config["accuracy_criterion"])
         ptq_quantization_config["tuning_criterion"] = TuningCriterion(**ptq_quantization_config["tuning_criterion"])
         ptq_quantization_config = PostTrainingQuantConfig(**ptq_quantization_config)
         LOGGER.info("\t+ Creating quantizer")
@@ -128,16 +126,8 @@ def quantize_automodel(self) -> None:
 
         if self.config.calibration:
             LOGGER.info("\t+ Generating calibration dataset")
-            dataset_shapes = {
-                "dataset_size": 1,
-                "sequence_length": 1,
-                **self.model_shapes,
-            }
-            calibration_dataset = DatasetGenerator(
-                task=self.config.task,
-                dataset_shapes=dataset_shapes,
-                model_shapes=self.model_shapes,
-            )()
+            dataset_shapes = {"dataset_size": 1, "sequence_length": 1, **self.model_shapes}
+            calibration_dataset = DatasetGenerator(task=self.config.task, dataset_shapes=dataset_shapes, model_shapes=self.model_shapes)()
             columns_to_be_removed = list(set(calibration_dataset.column_names) - set(quantizer._signature_columns))
             calibration_dataset = calibration_dataset.remove_columns(columns_to_be_removed)
         else:

diff --git a/optimum_benchmark/backends/neural_compressor/config.py b/optimum_benchmark/backends/neural_compressor/config.py
@@ -7,20 +7,10 @@
 from ...import_utils import neural_compressor_version
 
 # https://github.com/intel/neural-compressor/blob/master/neural_compressor/config.py#L490
-ACCURACY_CRITERION_CONFIG = {
-    "higher_is_better": True,
-    "criterion": "relative",
-    "tolerable_loss": 0.01,
-}
+ACCURACY_CRITERION_CONFIG = {"higher_is_better": True, "criterion": "relative", "tolerable_loss": 0.01}
 
 # https://github.com/intel/neural-compressor/blob/master/neural_compressor/config.py#L593
-TUNING_CRITERION_CONFIG = {
-    "strategy": "basic",
-    "strategy_kwargs": None,
-    "timeout": 0,
-    "max_trials": 100,
-    "objective": "performance",
-}
+TUNING_CRITERION_CONFIG = {"strategy": "basic", "strategy_kwargs": None, "timeout": 0, "max_trials": 100, "objective": "performance"}
 
 # https://github.com/intel/neural-compressor/blob/master/neural_compressor/config.py#L1242
 PTQ_QUANTIZATION_CONFIG = {
@@ -69,8 +59,6 @@ def __post_init__(self):
             raise ValueError(f"INCBackend only supports CPU devices, got {self.device}")
 
         if self.ptq_quantization:
-            self.ptq_quantization_config = OmegaConf.to_object(
-                OmegaConf.merge(PTQ_QUANTIZATION_CONFIG, self.ptq_quantization_config)
-            )
+            self.ptq_quantization_config = OmegaConf.to_object(OmegaConf.merge(PTQ_QUANTIZATION_CONFIG, self.ptq_quantization_config))
             if self.ptq_quantization_config["approach"] == "static" and not self.calibration:
                 raise ValueError("Calibration must be enabled when using static quantization.")
diff --git a/optimum_benchmark/backends/neural_compressor/utils.py b/optimum_benchmark/backends/neural_compressor/utils.py
@@ -1,5 +1,3 @@
 from optimum.intel.neural_compressor.utils import _HEAD_TO_AUTOMODELS
 
-TASKS_TO_INCMODELS = {
-    task: f"optimum.intel.neural_compressor.{incmodel_name}" for task, incmodel_name in _HEAD_TO_AUTOMODELS.items()
-}
+TASKS_TO_INCMODELS = {task: f"optimum.intel.neural_compressor.{incmodel_name}" for task, incmodel_name in _HEAD_TO_AUTOMODELS.items()}
diff --git a/optimum_benchmark/backends/onnxruntime/backend.py b/optimum_benchmark/backends/onnxruntime/backend.py
@@ -27,14 +27,7 @@
     QuantizationConfig,
     CalibrationConfig,
 )
-from optimum.onnxruntime import (
-    ONNX_DECODER_WITH_PAST_NAME,
-    ONNX_DECODER_NAME,
-    ORTTrainingArguments,
-    ORTOptimizer,
-    ORTQuantizer,
-    ORTTrainer,
-)
+from optimum.onnxruntime import ONNX_DECODER_WITH_PAST_NAME, ONNX_DECODER_NAME, ORTTrainingArguments, ORTOptimizer, ORTQuantizer, ORTTrainer
 
 # disable transformers logging
 set_verbosity_error()
@@ -112,11 +105,7 @@ def create_no_weights_model(self) -> None:
         state_dict = torch.nn.Linear(1, 1).state_dict()
 
         LOGGER.info("\t+ Saving no weights model state dict")
-        save_file(
-            filename=os.path.join(self.no_weights_model, "model.safetensors"),
-            metadata={"format": "pt"},
-            tensors=state_dict,
-        )
+        save_file(filename=os.path.join(self.no_weights_model, "model.safetensors"), metadata={"format": "pt"}, tensors=state_dict)
 
     def load_ortmodel_with_no_weights(self) -> None:
         self.create_no_weights_model()
@@ -198,10 +187,7 @@ def optimize_onnx_files(self) -> None:
                 **self.config.auto_optimization_config,
             )
         elif self.config.optimization:
-            optimization_config = OptimizationConfig(
-                optimize_for_gpu=(self.config.device == "cuda"),
-                **self.config.optimization_config,
-            )
+            optimization_config = OptimizationConfig(optimize_for_gpu=(self.config.device == "cuda"), **self.config.optimization_config)
         LOGGER.info("\t+ Creating optimizer")
         optimizer = ORTOptimizer.from_pretrained(self.config.model, file_names=self.onnx_files_names)
         LOGGER.info("\t+ Optimizing ORTModel")
@@ -243,27 +229,16 @@ def quantize_onnx_files(self) -> None:
 
         if self.is_calibrated:
             LOGGER.info("\t+ Generating calibration dataset")
-            dataset_shapes = {
-                "dataset_size": 1,
-                "sequence_length": 1,
-                **self.model_shapes,
-            }
-            calibration_dataset = DatasetGenerator(
-                task=self.config.task,
-                dataset_shapes=dataset_shapes,
-                model_shapes=self.model_shapes,
-            )()
+            dataset_shapes = {"dataset_size": 1, "sequence_length": 1, **self.model_shapes}
+            calibration_dataset = DatasetGenerator(task=self.config.task, dataset_shapes=dataset_shapes, model_shapes=self.model_shapes)()
             columns_to_be_removed = list(set(calibration_dataset.column_names) - set(self.inputs_names))
             calibration_dataset = calibration_dataset.remove_columns(columns_to_be_removed)
 
             LOGGER.info("\t+ Processing calibration config")
             if self.config.auto_calibration is not None:
                 LOGGER.info("\t+ Processing calibration config")
                 auto_calibration_method = getattr(AutoCalibrationConfig, self.config.auto_calibration)
-                calibration_config = auto_calibration_method(
-                    calibration_dataset,
-                    **self.config.auto_calibration_config,
-                )
+                calibration_config = auto_calibration_method(calibration_dataset, **self.config.auto_calibration_config)
             elif self.config.calibration:
                 LOGGER.info("\t+ Processing calibration config")
                 calibration_config = format_calibration_config(self.config.calibration_config)

diff --git a/optimum_benchmark/backends/onnxruntime/config.py b/optimum_benchmark/backends/onnxruntime/config.py
@@ -18,14 +18,11 @@
 }
 
 AUTO_QUANTIZATION_CONFIG = {
-    "is_static": False,
+    "is_static": False
     # is_static is mandatory
 }
 
-TRT_PROVIDER_OPTIONS = {
-    "trt_engine_cache_enable": True,
-    "trt_engine_cache_path": "/tmp/trt_cache",
-}
+TRT_PROVIDER_OPTIONS = {"trt_engine_cache_enable": True, "trt_engine_cache_path": "/tmp/trt_cache"}
 
 IO_BINDING_LIBRARIES = ["transformers", "timm"]
 IO_BINDING_PROVIDERS = ["CPUExecutionProvider", "CUDAExecutionProvider"]
@@ -103,36 +100,26 @@ def __post_init__(self):
             os.makedirs(self.provider_options["trt_engine_cache_path"], exist_ok=True)
 
         if self.quantization:
-            self.quantization_config = {
-                **QUANTIZATION_CONFIG,
-                **self.quantization_config,
-            }
+            self.quantization_config = {**QUANTIZATION_CONFIG, **self.quantization_config}
             # raise ValueError if the quantization is static but calibration is not enabled
             if self.quantization_config["is_static"] and self.auto_calibration is None and not self.calibration:
                 raise ValueError(
-                    "Quantization is static but calibration is not enabled. "
-                    "Please enable calibration or disable static quantization."
+                    "Quantization is static but calibration is not enabled. " "Please enable calibration or disable static quantization."
                 )
 
         if self.auto_quantization is not None:
-            self.auto_quantization_config = {
-                **AUTO_QUANTIZATION_CONFIG,
-                **self.auto_quantization_config,
-            }
+            self.auto_quantization_config = {**AUTO_QUANTIZATION_CONFIG, **self.auto_quantization_config}
             if self.auto_quantization_config["is_static"] and self.auto_calibration is None and not self.calibration:
                 raise ValueError(
-                    "Quantization is static but calibration is not enabled. "
-                    "Please enable calibration or disable static quantization."
+                    "Quantization is static but calibration is not enabled. " "Please enable calibration or disable static quantization."
                 )
 
         if self.calibration:
             self.calibration_config = {**CALIBRATION_CONFIG, **self.calibration_config}
 
         if self.peft_strategy is not None:
             if self.peft_strategy not in PEFT_CONFIGS:
-                raise ValueError(
-                    f"`peft_strategy` must be one of {list(PEFT_CONFIGS.keys())}. Got {self.peft_strategy} instead."
-                )
+                raise ValueError(f"`peft_strategy` must be one of {list(PEFT_CONFIGS.keys())}. Got {self.peft_strategy} instead.")
             PEFT_CONFIG = PEFT_CONFIGS[self.peft_strategy]
             self.peft_config = {**PEFT_CONFIG, **self.peft_config}
 

diff --git a/optimum_benchmark/backends/onnxruntime/utils.py b/optimum_benchmark/backends/onnxruntime/utils.py
@@ -1,22 +1,15 @@
 from typing import Any, Dict
 
 from optimum.pipelines import ORT_SUPPORTED_TASKS
-from onnxruntime.quantization import (
-    CalibrationMethod,
-    QuantizationMode,
-    QuantFormat,
-    QuantType,
-)
+from onnxruntime.quantization import CalibrationMethod, QuantizationMode, QuantFormat, QuantType
 
 
 TASKS_TO_ORTSD = {
     "stable-diffusion": "optimum.onnxruntime.ORTStableDiffusionPipeline",
     "stable-diffusion-xl": "optimum.onnxruntime.ORTStableDiffusionXLPipeline",
 }
 
-TASKS_TO_ORTMODELS = {
-    task: f"optimum.onnxruntime.{task_dict['class'][0].__name__}" for task, task_dict in ORT_SUPPORTED_TASKS.items()
-}
+TASKS_TO_ORTMODELS = {task: f"optimum.onnxruntime.{task_dict['class'][0].__name__}" for task, task_dict in ORT_SUPPORTED_TASKS.items()}
 
 
 def format_calibration_config(calibration_config: Dict[str, Any]) -> None:

diff --git a/optimum_benchmark/backends/openvino/backend.py b/optimum_benchmark/backends/openvino/backend.py
@@ -80,11 +80,7 @@ def create_no_weights_model(self) -> None:
         state_dict = torch.nn.Linear(1, 1).state_dict()
 
         LOGGER.info("\t+ Saving no weights model state dict")
-        save_file(
-            filename=os.path.join(self.no_weights_model, "model.safetensors"),
-            metadata={"format": "pt"},
-            tensors=state_dict,
-        )
+        save_file(filename=os.path.join(self.no_weights_model, "model.safetensors"), metadata={"format": "pt"}, tensors=state_dict)
 
     def load_automodel_with_no_weights(self) -> None:
         self.create_no_weights_model()
@@ -143,16 +139,8 @@ def quantize_automodel(self) -> None:
 
         if self.config.calibration:
             LOGGER.info("\t+ Generating calibration dataset")
-            dataset_shapes = {
-                "dataset_size": 1,
-                "sequence_length": 1,
-                **self.model_shapes,
-            }
-            calibration_dataset = DatasetGenerator(
-                task=self.config.task,
-                dataset_shapes=dataset_shapes,
-                model_shapes=self.model_shapes,
-            )()
+            dataset_shapes = {"dataset_size": 1, "sequence_length": 1, **self.model_shapes}
+            calibration_dataset = DatasetGenerator(task=self.config.task, dataset_shapes=dataset_shapes, model_shapes=self.model_shapes)()
             columns_to_be_removed = list(set(calibration_dataset.column_names) - set(quantizer._export_input_names))
             calibration_dataset = calibration_dataset.remove_columns(columns_to_be_removed)
         else:
@@ -175,9 +163,7 @@ def quantize_automodel(self) -> None:
     def prepare_for_inference(self, **kwargs) -> None:
         if self.config.reshape:
             static_shapes = {
-                key: value
-                for key, value in kwargs.items()
-                if key in inspect.getfullargspec(self.pretrained_model.reshape).args
+                key: value for key, value in kwargs.items() if key in inspect.getfullargspec(self.pretrained_model.reshape).args
             }
             if (static_shapes.get("height", None) is not None) and ("sequence_length" in static_shapes):
                 static_shapes["sequence_length"] = kwargs.get("num_channels", 3)

diff --git a/optimum_benchmark/backends/openvino/utils.py b/optimum_benchmark/backends/openvino/utils.py
@@ -1,8 +1,4 @@
 from optimum.intel.openvino.utils import _HEAD_TO_AUTOMODELS
 
 TASKS_TO_OVMODEL = {task: f"optimum.intel.openvino.{ovmodel}" for task, ovmodel in _HEAD_TO_AUTOMODELS.items()}
-TASKS_TO_OVMODEL.update(
-    {
-        "feature-extraction": "optimum.intel.openvino.OVModelForFeatureExtraction",
-    }
-)
+TASKS_TO_OVMODEL.update({"feature-extraction": "optimum.intel.openvino.OVModelForFeatureExtraction"})
diff --git a/optimum_benchmark/backends/peft_utils.py b/optimum_benchmark/backends/peft_utils.py
@@ -3,24 +3,9 @@
 from ..import_utils import is_peft_available
 
 if is_peft_available():
-    from peft import (
-        IA3Config,
-        LoraConfig,
-        PeftConfig,
-        AdaLoraConfig,
-        PrefixTuningConfig,
-        PromptEncoderConfig,
-        PromptLearningConfig,
-    )
+    from peft import IA3Config, LoraConfig, PeftConfig, AdaLoraConfig, PrefixTuningConfig, PromptEncoderConfig, PromptLearningConfig
 
-PEFT_TASKS_TYPES = [
-    "SEQ_CLS",
-    "SEQ_2_SEQ_LM",
-    "CAUSAL_LM",
-    "TOKEN_CLS",
-    "QUESTION_ANS",
-    "FEATURE_EXTRACTION",
-]
+PEFT_TASKS_TYPES = ["SEQ_CLS", "SEQ_2_SEQ_LM", "CAUSAL_LM", "TOKEN_CLS", "QUESTION_ANS", "FEATURE_EXTRACTION"]
 
 PEFT_CONFIG = {
     "base_model_name_or_path": None,