Skip to content

Commit

Permalink
style
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil committed Feb 5, 2024
1 parent c26e3c0 commit 349019b
Show file tree
Hide file tree
Showing 34 changed files with 239 additions and 651 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Quality Code Checks
name: Quality Check

on:
workflow_dispatch:
Expand All @@ -12,7 +12,7 @@ concurrency:
cancel-in-progress: true

jobs:
run_code_quality_checks:
run_quality_checks:
runs-on: ubuntu-latest
steps:
- name: Checkout
Expand All @@ -29,4 +29,4 @@ jobs:
pip install -e .[quality]
- name: Check style
run: make style_check
run: make quality
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -169,4 +169,5 @@ version.txt

actions-runner/
experiments/
.engine/
examples/
.engine/
45 changes: 14 additions & 31 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,36 +1,19 @@
# Copyright 2021 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# List of targets that are not associated with files
.PHONY: style_check style test install install_dev_cpu install_dev_gpu

SHELL := /bin/bash
CURRENT_DIR = $(shell pwd)
DEFAULT_CLONE_URL := https://github.com/huggingface/optimum-benchmark.git
# If CLONE_URL is empty, revert to DEFAULT_CLONE_URL
REAL_CLONE_URL = $(if $(CLONE_URL),$(CLONE_URL),$(DEFAULT_CLONE_URL))

# Install the library in development mode
.PHONY: style_check style test

# Run code quality checks
style_check:
black --check .
quality:
ruff check .
ruff format --check .

# Format the code
style:
black .
ruff --fix .
ruff format .
ruff check --fix .

install:
pip install -e .

install_dev_cpu:
pip install -e .[quality,testing,openvino,onnxruntime,neural-compressor,diffusers,timm,peft]

# Run tests for the library
test:
python -m pytest tests
install_dev_gpu:
pip install -e .[quality,testing,onnxruntime-gpu,deepspeed,diffusers,timm,peft]
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,13 @@ git clone https://github.com/huggingface/optimum-benchmark.git && python -m pip

Depending on the backends you want to use, you might need to install some extra dependencies:

- Pytorch (default): `pip install optimum-benchmark`
- OpenVINO: `pip install optimum-benchmark[openvino]`
- Torch-ORT: `pip install optimum-benchmark[torch-ort]`
- OnnxRuntime: `pip install optimum-benchmark[onnxruntime]`
- TensorRT-LLM: `pip install optimum-benchmark[tensorrt-llm]`
- OnnxRuntime-GPU: `pip install optimum-benchmark[onnxruntime-gpu]`
- Intel Neural Compressor: `pip install optimum-benchmark[neural-compressor]`
- OnnxRuntime-Training: `pip install optimum-benchmark[onnxruntime-training]`
- Text Generation Inference: `pip install optimum-benchmark[text-generation-inference]`

### Running a benchmark 🏃
Expand Down
18 changes: 5 additions & 13 deletions optimum_benchmark/backends/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
AutoModel,
)

from .config import BackendConfigT, BackendConfig
from .config import BackendConfigT
from ..task_utils import get_automodel_class_for_task
from .diffusers_utils import (
extract_diffusers_shapes_from_config,
Expand Down Expand Up @@ -64,25 +64,17 @@ def __init__(self, config: BackendConfigT):
if self.config.library == "diffusers":
self.pretrained_processor = None
self.pretrained_generation_config = None
self.pretrained_config = get_diffusers_pretrained_config(
model=self.config.model, **self.config.hub_kwargs
)
self.model_shapes = extract_diffusers_shapes_from_config(
model=self.config.model, **self.config.hub_kwargs
)
self.pretrained_config = get_diffusers_pretrained_config(model=self.config.model, **self.config.hub_kwargs)
self.model_shapes = extract_diffusers_shapes_from_config(model=self.config.model, **self.config.hub_kwargs)
self.model_type = self.config.task
elif self.config.library == "timm":
self.pretrained_processor = get_timm_pretrained_processor(self.config.model)
self.pretrained_config = get_timm_pretrained_config(self.config.model)
self.model_shapes = extract_timm_shapes_from_config(
config=self.pretrained_config
)
self.model_shapes = extract_timm_shapes_from_config(config=self.pretrained_config)
self.model_type = self.pretrained_config.architecture
self.pretrained_generation_config = None
else:
self.pretrained_config = get_transformers_pretrained_config(
self.config.model, **self.config.hub_kwargs
)
self.pretrained_config = get_transformers_pretrained_config(self.config.model, **self.config.hub_kwargs)
self.pretrained_generation_config = get_transformers_generation_config(
self.config.model, **self.config.hub_kwargs
)
Expand Down
29 changes: 7 additions & 22 deletions optimum_benchmark/backends/neural_compressor/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,11 @@ def __init__(self, config: INCConfig):

def validate_task(self) -> None:
if self.config.task not in TASKS_TO_INCMODELS:
raise NotImplementedError(
f"INCBackend does not support task {self.config.task}"
)
raise NotImplementedError(f"INCBackend does not support task {self.config.task}")

def load_automodel_from_pretrained(self) -> None:
LOGGER.info("\t+ Loading AutoModel from pretrained")
self.pretrained_model = self.automodel_class.from_pretrained(
self.config.model, **self.config.hub_kwargs
)
self.pretrained_model = self.automodel_class.from_pretrained(self.config.model, **self.config.hub_kwargs)

def load_automodel_with_no_weights(self) -> None:
no_weights_model = os.path.join(self.tmpdir.name, "no_weights")
Expand All @@ -91,9 +87,7 @@ def load_automodel_with_no_weights(self) -> None:

def load_incmodel_from_pretrained(self) -> None:
LOGGER.info("\t+ Loading INCModel from pretrained")
self.pretrained_model = self.incmodel_class.from_pretrained(
self.config.model, **self.config.hub_kwargs
)
self.pretrained_model = self.incmodel_class.from_pretrained(self.config.model, **self.config.hub_kwargs)

def load_incmodel_with_no_weights(self) -> None:
no_weights_model = os.path.join(self.tmpdir.name, "no_weights")
Expand All @@ -117,9 +111,7 @@ def quantize_automodel(self) -> None:
ptq_quantization_config["accuracy_criterion"] = AccuracyCriterion(
**ptq_quantization_config["accuracy_criterion"]
)
ptq_quantization_config["tuning_criterion"] = TuningCriterion(
**ptq_quantization_config["tuning_criterion"]
)
ptq_quantization_config["tuning_criterion"] = TuningCriterion(**ptq_quantization_config["tuning_criterion"])
ptq_quantization_config = PostTrainingQuantConfig(**ptq_quantization_config)
LOGGER.info("\t+ Creating quantizer")
quantizer = INCQuantizer.from_pretrained(
Expand All @@ -138,16 +130,9 @@ def quantize_automodel(self) -> None:
"sequence_length": 1,
**self.model_shapes,
}
calibration_dataset = DatasetGenerator(
task=self.config.task, dataset_shapes=dataset_shapes
).generate()
columns_to_be_removed = list(
set(calibration_dataset.column_names)
- set(quantizer._signature_columns)
)
calibration_dataset = calibration_dataset.remove_columns(
columns_to_be_removed
)
calibration_dataset = DatasetGenerator(task=self.config.task, dataset_shapes=dataset_shapes).generate()
columns_to_be_removed = list(set(calibration_dataset.column_names) - set(quantizer._signature_columns))
calibration_dataset = calibration_dataset.remove_columns(columns_to_be_removed)
else:
calibration_dataset = None

Expand Down
9 changes: 2 additions & 7 deletions optimum_benchmark/backends/neural_compressor/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,5 @@ def __post_init__(self):
self.ptq_quantization_config = OmegaConf.to_object(
OmegaConf.merge(PTQ_QUANTIZATION_CONFIG, self.ptq_quantization_config)
)
if (
self.ptq_quantization_config["approach"] == "static"
and not self.calibration
):
raise ValueError(
"Calibration must be enabled when using static quantization."
)
if self.ptq_quantization_config["approach"] == "static" and not self.calibration:
raise ValueError("Calibration must be enabled when using static quantization.")
76 changes: 19 additions & 57 deletions optimum_benchmark/backends/onnxruntime/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,7 @@ def __init__(self, config: ORTConfig) -> None:
self.ortmodel_class = get_class(TASKS_TO_ORTMODELS[self.config.task])
LOGGER.info(f"Using ORTModel class {self.ortmodel_class.__name__}")
else:
raise NotImplementedError(
f"ORTBackend does not support task {self.config.task}"
)
raise NotImplementedError(f"ORTBackend does not support task {self.config.task}")

self.set_session_options()
self.tmpdir = TemporaryDirectory()
Expand Down Expand Up @@ -94,20 +92,15 @@ def __init__(self, config: ORTConfig) -> None:

def validate_task(self) -> None:
if self.config.task not in {**TASKS_TO_ORTMODELS, **TASKS_TO_ORTSD}:
raise NotImplementedError(
f"ORTBackend does not support task {self.config.task}"
)
raise NotImplementedError(f"ORTBackend does not support task {self.config.task}")

def validate_provider(self) -> None:
assert (
self.pretrained_model.providers[0] == self.config.provider
), f"{self.config.provider} is not first in providers list: {self.pretrained_model.providers}"

def is_deferred_trt_loading(self) -> bool:
return (
self.config.provider == "TensorrtExecutionProvider"
and self.config.task in TEXT_GENERATION_TASKS
)
return self.config.provider == "TensorrtExecutionProvider" and self.config.task in TEXT_GENERATION_TASKS

def set_session_options(self) -> None:
self.session_options = SessionOptions()
Expand Down Expand Up @@ -175,20 +168,15 @@ def ortmodel_kwargs(self) -> Dict[str, Any]:

@property
def onnx_files_names(self):
assert os.path.isdir(
self.config.model
), f"{self.config.model} is not a directory"
assert os.path.isdir(self.config.model), f"{self.config.model} is not a directory"
if self.config.use_merged:
return [
model
for model in os.listdir(self.config.model)
if model not in [ONNX_DECODER_NAME, ONNX_DECODER_WITH_PAST_NAME]
and model.endswith(".onnx")
if model not in [ONNX_DECODER_NAME, ONNX_DECODER_WITH_PAST_NAME] and model.endswith(".onnx")
]
else:
return [
file for file in os.listdir(self.config.model) if file.endswith(".onnx")
]
return [file for file in os.listdir(self.config.model) if file.endswith(".onnx")]

@property
def inputs_names(self) -> List[str]:
Expand All @@ -215,9 +203,7 @@ def optimize_onnx_files(self) -> None:
**self.config.optimization_config,
)
LOGGER.info("\t+ Creating optimizer")
optimizer = ORTOptimizer.from_pretrained(
self.config.model, file_names=self.onnx_files_names
)
optimizer = ORTOptimizer.from_pretrained(self.config.model, file_names=self.onnx_files_names)
LOGGER.info("\t+ Optimizing ORTModel")
optimizer.optimize(
optimization_config,
Expand Down Expand Up @@ -248,17 +234,11 @@ def quantize_onnx_files(self) -> None:

LOGGER.info("\t+ Processing quantization config")
if self.config.auto_quantization is not None:
auto_quantization_config = format_quantization_config(
self.config.auto_quantization_config
)
auto_quantization_class = getattr(
AutoQuantizationConfig, self.config.auto_quantization
)
auto_quantization_config = format_quantization_config(self.config.auto_quantization_config)
auto_quantization_class = getattr(AutoQuantizationConfig, self.config.auto_quantization)
quantization_config = auto_quantization_class(**auto_quantization_config)
elif self.config.quantization:
quantization_config = format_quantization_config(
self.config.quantization_config
)
quantization_config = format_quantization_config(self.config.quantization_config)
quantization_config = QuantizationConfig(**quantization_config)

if self.is_calibrated:
Expand All @@ -268,31 +248,21 @@ def quantize_onnx_files(self) -> None:
"sequence_length": 1,
**self.model_shapes,
}
calibration_dataset = DatasetGenerator(
task=self.config.task, dataset_shapes=dataset_shapes
).generate()
columns_to_be_removed = list(
set(calibration_dataset.column_names) - set(self.inputs_names)
)
calibration_dataset = calibration_dataset.remove_columns(
columns_to_be_removed
)
calibration_dataset = DatasetGenerator(task=self.config.task, dataset_shapes=dataset_shapes).generate()
columns_to_be_removed = list(set(calibration_dataset.column_names) - set(self.inputs_names))
calibration_dataset = calibration_dataset.remove_columns(columns_to_be_removed)

LOGGER.info("\t+ Processing calibration config")
if self.config.auto_calibration is not None:
LOGGER.info("\t+ Processing calibration config")
auto_calibration_method = getattr(
AutoCalibrationConfig, self.config.auto_calibration
)
auto_calibration_method = getattr(AutoCalibrationConfig, self.config.auto_calibration)
calibration_config = auto_calibration_method(
calibration_dataset,
**self.config.auto_calibration_config,
)
elif self.config.calibration:
LOGGER.info("\t+ Processing calibration config")
calibration_config = format_calibration_config(
self.config.calibration_config
)
calibration_config = format_calibration_config(self.config.calibration_config)
calibration_config = CalibrationConfig(
dataset_name="calibration_dataset",
dataset_split=calibration_dataset.split,
Expand All @@ -303,9 +273,7 @@ def quantize_onnx_files(self) -> None:

for onnx_file_name in self.onnx_files_names:
LOGGER.info(f"\t+ Creating quantizer for {onnx_file_name}")
quantizer = ORTQuantizer.from_pretrained(
self.config.model, file_name=onnx_file_name
)
quantizer = ORTQuantizer.from_pretrained(self.config.model, file_name=onnx_file_name)

if self.is_calibrated:
LOGGER.info("\t+ Fitting calibration tensors range")
Expand Down Expand Up @@ -343,9 +311,7 @@ def quantize_onnx_files(self) -> None:

def prepare_for_inference(self, **kwargs) -> None:
if self.is_deferred_trt_loading():
LOGGER.info(
"\t+ Creating dynamic shapes for Tensorrt engine. Engine creation might take a while."
)
LOGGER.info("\t+ Creating dynamic shapes for Tensorrt engine. Engine creation might take a while.")
batch_size = kwargs["batch_size"]
max_new_tokens = kwargs["max_new_tokens"]
sequence_length = kwargs["sequence_length"]
Expand Down Expand Up @@ -399,12 +365,8 @@ def train(
from optimum.onnxruntime import ORTTrainer, ORTTrainingArguments

LOGGER.info("\t+ Setting dataset format to `torch`")
training_dataset.set_format(
type="torch", columns=list(training_dataset.features.keys())
)
LOGGER.info(
"\t+ Wrapping training arguments with optimum.onnxruntime.ORTTrainingArguments"
)
training_dataset.set_format(type="torch", columns=list(training_dataset.features.keys()))
LOGGER.info("\t+ Wrapping training arguments with optimum.onnxruntime.ORTTrainingArguments")
training_arguments = ORTTrainingArguments(**training_arguments)
LOGGER.info("\t+ Wrapping model with optimum.onnxruntime.ORTTrainer")
trainer = ORTTrainer(
Expand Down
Loading

0 comments on commit 349019b

Please sign in to comment.