Skip to content

Commit

Permalink
feat: Integrate mlfow upload for onnx models (#101)
Browse files Browse the repository at this point in the history
* fix: Solve segmentation analysis report slowdows issue

* feat: Add model upload param in core config, support onnx mlflow log

* build: Upload version to 1.5.7

* fix: Refactor tests

* add: Print dataset split count also for segmentation

* feat: Replace default core's upload_models values  with export.types

* docs: Update changelog
  • Loading branch information
AlessandroPolidori authored Feb 5, 2024
1 parent 76fa8ae commit ccd6566
Show file tree
Hide file tree
Showing 10 changed files with 114 additions and 39 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,16 @@
# Changelog
All notable changes to this project will be documented in this file.

### [1.5.7]

#### Added

- Add upload_models to core config

#### Refactored

- infer_signature_torch_model refactored to infer_signature_model

### [1.5.6]

#### Added
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "quadra"
version = "1.5.6"
version = "1.5.7"
description = "Deep Learning experiment orchestration library"
authors = [
"Federico Belotti <[email protected]>",
Expand Down
2 changes: 1 addition & 1 deletion quadra/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "1.5.6"
__version__ = "1.5.7"


def get_version():
Expand Down
1 change: 1 addition & 0 deletions quadra/configs/core/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ cv2_num_threads: 1
command: "python "
experiment_path: null
upload_artifacts: False
upload_models: ${export.types} # Default behavior in quadra <= 1.5.6
log_level: info
11 changes: 8 additions & 3 deletions quadra/datamodules/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,13 +305,18 @@ def save_checkpoint(self) -> None:
self.data.to_csv(self.dataset_file, index=False)
log.info("Datamodule checkpoint saved to disk.")

if "targets" in self.data and not isinstance(self.data["targets"].iloc[0], np.ndarray):
# If we find a numpy array target it's very likely one hot encoded, in that case we don't want to print
if "targets" in self.data:
if isinstance(self.data["targets"].iloc[0], np.ndarray):
# If we find a numpy array target it's very likely one hot encoded,
# in that case we just print the number of train/val/test samples
grouping = ["split"]
else:
grouping = ["split", "targets"]
log.info("Dataset Info:")
split_order = {"train": 0, "val": 1, "test": 2}
log.info(
"\n%s",
self.data.groupby(["split", "targets"])
self.data.groupby(grouping)
.size()
.to_frame()
.reset_index()
Expand Down
1 change: 1 addition & 0 deletions quadra/models/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,7 @@ def to(self, device: str):

def eval(self):
"""Fake interface to match torch models."""
return self

def half(self):
"""Convert model to half precision."""
Expand Down
19 changes: 9 additions & 10 deletions quadra/utils/mlflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,28 @@
except ImportError:
MLFLOW_AVAILABLE = False

from typing import Any, Sequence, TypeVar
from typing import Any, Sequence

import torch
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import MLFlowLogger
from torch import nn

NnModuleT = TypeVar("NnModuleT", bound=nn.Module)
from quadra.models.evaluation import BaseEvaluationModel


@torch.inference_mode()
def infer_signature_torch_model(model: NnModuleT, data: list[Any]) -> ModelSignature | None:
def infer_signature_model(model: BaseEvaluationModel, data: list[Any]) -> ModelSignature | None:
"""Infer input and output signature for a PyTorch/Torchscript model."""
model = model.eval()
model_output = model(*data)

try:
output_signature = infer_signature_input_torch(model_output)
output_signature = infer_signature_input(model_output)

if len(data) == 1:
signature_input = infer_signature_input_torch(data[0])
signature_input = infer_signature_input(data[0])
else:
signature_input = infer_signature_input_torch(data)
signature_input = infer_signature_input(data)
except ValueError:
# TODO: Solve circular import as it is not possible to import get_logger right now
# log.warning("Unable to infer signature for model output type %s", type(model_output))
Expand All @@ -39,7 +38,7 @@ def infer_signature_torch_model(model: NnModuleT, data: list[Any]) -> ModelSigna
return infer_signature(signature_input, output_signature)


def infer_signature_input_torch(input_tensor: Any) -> Any:
def infer_signature_input(input_tensor: Any) -> Any:
"""Recursively infer the signature input format to pass to mlflow.models.infer_signature.
Raises:
Expand All @@ -58,7 +57,7 @@ def infer_signature_input_torch(input_tensor: Any) -> Any:
# Nested dicts are not supported
raise ValueError("Nested dicts are not supported")

signature[f"output_{i}"] = infer_signature_input_torch(x)
signature[f"output_{i}"] = infer_signature_input(x)
elif isinstance(input_tensor, torch.Tensor):
signature = input_tensor.cpu().numpy()
elif isinstance(input_tensor, dict):
Expand All @@ -71,7 +70,7 @@ def infer_signature_input_torch(input_tensor: Any) -> Any:
# Nested signature is currently not supported by mlflow
raise ValueError("Nested sequences are not supported")

signature[k] = infer_signature_input_torch(v)
signature[k] = infer_signature_input(v)
else:
raise ValueError(f"Unable to infer signature for model output type {type(input_tensor)}")

Expand Down
80 changes: 68 additions & 12 deletions quadra/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import dotenv
import mlflow
import numpy as np
import onnx
import pytorch_lightning as pl
import rich.syntax
import rich.tree
Expand All @@ -28,7 +29,7 @@
import quadra
import quadra.utils.export as quadra_export
from quadra.callbacks.mlflow import get_mlflow_logger
from quadra.utils.mlflow import infer_signature_torch_model
from quadra.utils.mlflow import infer_signature_model

IMAGE_EXTENSIONS: List[str] = [".png", ".jpg", ".jpeg", ".bmp", ".tiff", ".tif", ".pbm", ".pgm", ".ppm", ".pxm", ".pnm"]

Expand Down Expand Up @@ -291,20 +292,40 @@ def finish(
List[Any],
quadra_export.generate_torch_inputs(input_size, device=device, half_precision=half_precision),
)
types_to_upload = config.core.get("upload_models")
for model_path in deployed_models:
if model_path.endswith(".pt"):
model_type = model_type_from_path(model_path)
if model_type is None:
logging.warning("%s model type not supported", model_path)
continue
if model_type is not None and model_type in types_to_upload:
model = quadra_export.import_deployment_model(
model_path, device=device, inference_config=config.inference
).model

signature = infer_signature_torch_model(model, inputs)

with mlflow.start_run(run_id=mlflow_logger.run_id) as _:
mlflow.pytorch.log_model(
model,
artifact_path=model_path,
signature=signature,
)
)

if model_type in ["torchscript", "pytorch"]:
signature = infer_signature_model(model.model, inputs)
with mlflow.start_run(run_id=mlflow_logger.run_id) as _:
mlflow.pytorch.log_model(
model.model,
artifact_path=model_path,
signature=signature,
)
elif model_type in ["onnx", "simplified_onnx"]:
signature = infer_signature_model(model, inputs)
with mlflow.start_run(run_id=mlflow_logger.run_id) as _:
if model.model_path is None:
logging.warning(
"Cannot log onnx model on mlflow, \
BaseEvaluationModel 'model_path' attribute is None"
)
else:
model_proto = onnx.load(model.model_path)
mlflow.onnx.log_model(
model_proto,
artifact_path=model_path,
signature=signature,
)

if tensorboard_logger is not None:
config_paths = []
Expand All @@ -331,6 +352,41 @@ def load_envs(env_file: Optional[str] = None) -> None:
dotenv.load_dotenv(dotenv_path=env_file, override=True)


def model_type_from_path(model_path: str) -> Optional[str]:
"""Determine the type of the machine learning model based on its file extension.
Parameters:
- model_path (str): The file path of the machine learning model.
Returns:
- str: The type of the model, which can be one of the following:
- "torchscript" if the model has a '.pt' extension (TorchScript).
- "pytorch" if the model has a '.pth' extension (PyTorch).
- "simplified_onnx" if the model file ends with 'simplified.onnx' (Simplified ONNX).
- "onnx" if the model has a '.onnx' extension (ONNX).
- "json" id the model has a '.json' extension (JSON).
- None if model extension is not supported.
Example:
```python
model_path = "path/to/your/model.onnx"
model_type = model_type_from_path(model_path)
print(f"The model type is: {model_type}")
```
"""
if model_path.endswith(".pt"):
return "torchscript"
if model_path.endswith(".pth"):
return "pytorch"
if model_path.endswith("simplified.onnx"):
return "simplified_onnx"
if model_path.endswith(".onnx"):
return "onnx"
if model_path.endswith(".json"):
return "json"
return None


def setup_opencv() -> None:
"""Setup OpenCV to use only one thread and not use OpenCL."""
cv2.setNumThreads(1)
Expand Down
3 changes: 3 additions & 0 deletions quadra/utils/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ def create_grid_figure(
fig_size (Tuple[int, int], optional): Figure size. Defaults to (12, 8).
bounds (Optional[List[Tuple[float, float]]], optional): Bounds for the images. Defaults to None.
"""
default_plt_backend = plt.get_backend()
plt.switch_backend("Agg")
_, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=fig_size, squeeze=False)
for i, row in enumerate(images):
for j, image in enumerate(row):
Expand All @@ -84,6 +86,7 @@ def create_grid_figure(
plt.tight_layout()
plt.savefig(file_path, bbox_inches="tight", dpi=300, facecolor="white", transparent=False)
plt.close()
plt.switch_backend(default_plt_backend)


def create_visualization_dataset(dataset: torch.utils.data.Dataset):
Expand Down
24 changes: 12 additions & 12 deletions tests/utilities/test_mlflow_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

from quadra.models.base import ModelSignatureWrapper
from quadra.utils.export import generate_torch_inputs
from quadra.utils.mlflow import infer_signature_input_torch, infer_signature_torch_model
from quadra.utils.mlflow import infer_signature_input, infer_signature_model
from quadra.utils.tests.models import DoubleInputModel, SingleInputModel


Expand Down Expand Up @@ -57,7 +57,7 @@ def test_single_tensor_signature():

inputs = generate_torch_inputs(model.input_shapes, device="cpu")

signature = infer_signature_torch_model(model, inputs)
signature = infer_signature_model(model, inputs)

expected_input_signature = [TensorSpec(shape=(-1, *x.shape[1:]), type=x.numpy().dtype)]

Expand All @@ -76,7 +76,7 @@ def test_multiple_tensor_signature():
assert model.input_shapes == [(3, 224, 224), (3, 448, 448)]
inputs = generate_torch_inputs(model.input_shapes, device="cpu")

signature = infer_signature_torch_model(model, inputs)
signature = infer_signature_model(model, inputs)

expected_input_signature = [
TensorSpec(shape=(-1, *x.shape[1:]), type=x.numpy().dtype, name="output_0"),
Expand All @@ -98,7 +98,7 @@ def test_dict_signature():

inputs = generate_torch_inputs(model.input_shapes, device="cpu")

signature = infer_signature_torch_model(model, inputs)
signature = infer_signature_model(model, inputs)

expected_input_signature = [TensorSpec(shape=(-1, *x.shape[1:]), type=x.numpy().dtype, name="x")]

Expand Down Expand Up @@ -126,9 +126,9 @@ def test_nested_tuple_signature():

# Nested structures are not supported
with raises(ValueError):
infer_signature_input_torch(inputs)
infer_signature_input(inputs)

signature = infer_signature(infer_signature_input_torch(outputs), infer_signature_input_torch(outputs))
signature = infer_signature(infer_signature_input(outputs), infer_signature_input(outputs))

assert check_signature_equality(signature.outputs.inputs, expected_output_signature)

Expand All @@ -151,9 +151,9 @@ def test_nested_list_signature():

# Nested structures are not supported
with raises(ValueError):
infer_signature_input_torch(inputs)
infer_signature_input(inputs)

signature = infer_signature(infer_signature_input_torch(outputs), infer_signature_input_torch(outputs))
signature = infer_signature(infer_signature_input(outputs), infer_signature_input(outputs))

assert check_signature_equality(signature.outputs.inputs, expected_output_signature)

Expand All @@ -170,10 +170,10 @@ def test_nested_dicts_signature():

# Nested structures are not supported
with raises(ValueError):
infer_signature_input_torch(inputs)
infer_signature_input(inputs)

with raises(ValueError):
infer_signature_input_torch(outputs)
infer_signature_input(outputs)


@torch.inference_mode()
Expand All @@ -189,7 +189,7 @@ def test_tuple_of_dicts_signature():

# Nested structures are not supported
with raises(ValueError):
infer_signature_input_torch(inputs)
infer_signature_input(inputs)

with raises(ValueError):
infer_signature_input_torch(outputs)
infer_signature_input(outputs)

0 comments on commit ccd6566

Please sign in to comment.