diff --git a/Makefile b/Makefile index eeb3612..d099ab4 100644 --- a/Makefile +++ b/Makefile @@ -10,11 +10,11 @@ venv: @uv venv --python=python3.12 install: .uv .pre-commit - @uv pip install -e ".[cpu,dev]" + @uv pip install -e ".[dev]" --no-cache-dir @pre-commit install install-gpu: .uv .pre-commit - @uv pip install -e ".[dev,gpu]" + @uv pip install -e ".[dev,onnx,tensorrt,torch]" --no-cache-dir @pre-commit install lint: diff --git a/README.md b/README.md index 7db7342..f3d7b34 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Focoos Foundational Models +# Focoos pre-trained models | Model Name | Task | Metrics | Domain | | ------------------- | --------------------- | ------- | ------------------------------- | @@ -14,50 +14,69 @@ | focoos_isaid_nano | Semantic Segmentation | - | Satellite Imagery, 15 classes | | focoos_isaid_medium | Semantic Segmentation | - | Satellite Imagery, 15 classes | -# Focoos SDK +# Focoos +Focoos is a comprehensive SDK designed for computer vision tasks such as object detection, semantic segmentation, instance segmentation, and more. It provides pre-trained models that can be easily integrated and customized by users for various applications. +Focoos supports both cloud and local inference, and enables training on the cloud, making it a versatile tool for developers working in different domains, including autonomous driving, common scenes, drone aerial scenes, and satellite imagery. -![Tests](https://github.com/FocoosAI/focoos/actions/workflows/test.yml/badge.svg??event=push&branch=main) - -## Requirements - -### CUDA 12 +### Key Features -For **local inference**, ensure that you have CUDA 12 and cuDNN 9 installed, as they are required for onnxruntime version 1.20.1. +- **Pre-trained Models**: A wide range of pre-trained models for different tasks and domains. +- **Multiple Inference Runtimes**: Support for various inference runtimes including CPU, GPU, Torchscript CUDA, OnnxRuntime CUDA, and OnnxRuntime TensorRT. +- **Cloud Inference**: API to Focoos cloud inference. +- **Local Inference**: local inference, making it easy to deploy models on the local machine. +- **Cloud Training**: Train user models on the focoos cloud. +- **Model Monitoring**: Monitor model performance and metrics. -To install cuDNN 9: +![Tests](https://github.com/FocoosAI/focoos/actions/workflows/test.yml/badge.svg??event=push&branch=main) +# 🐍 Setup +We recommend using [UV](https://docs.astral.sh/uv/) as a package manager and environment manager for a streamlined dependency management experience. +Here’s how to create a new virtual environment with UV: ```bash -apt-get -y install cudnn9-cuda-12 +pip install uv +uv venv --python 3.12 +source .venv/bin/activate ``` -### (Optional) TensorRT +Focoos models support multiple inference runtimes. +To keep the library lightweight, optional dependencies (e.g., torch, onnxruntime, tensorrt) are not installed by default. +You can install the required optional dependencies using the following syntax: -To perform inference using TensorRT, ensure you have TensorRT version 10.5 installed. +## CPU only or Remote Usage ```bash -sudo apt-get install tensorrt +uv pip install focoos git+https://github.com/FocoosAI/focoos.git ``` -# Install +## GPU Runtimes +### Torchscript CUDA +```bash +uv pip install focoos[torch] git+https://github.com/FocoosAI/focoos.git +``` -Nvidia GPU: +### OnnxRuntime CUDA +ensure that you have CUDA 12 and cuDNN 9 installed, as they are required for onnxruntime version 1.20.1. ```bash -pip install '.[gpu]' +apt-get -y install cudnn9-cuda-12 ``` -Nvidia GPU,TensorRT: - ```bash -pip install '.[gpu,tensorrt]' +uv pip install focoos[onnx] git+https://github.com/FocoosAI/focoos.gi ``` -CPU,COREML: +### OnnxRuntime TensorRT + +To perform inference using TensorRT, ensure you have TensorRT version 10.5 installed. +```bash +sudo apt-get install tensorrt +``` ```bash -pip install '.[cpu]' +uv pip install focoos[tensorrt] git+https://github.com/FocoosAI/focoos.git ``` + ## 🤖 Cloud Inference ```python @@ -74,7 +93,7 @@ detections = model.infer("./image.jpg", threshold=0.4) setup FOCOOS_API_KEY_GRADIO environment variable with your Focoos API key ```bash -pip install '.[gradio]' +uv pip install focoos[gradio] git+https://github.com/FocoosAI/focoos.git ``` ```bash diff --git a/focoos/__init__.py b/focoos/__init__.py index 5665b00..570d8c1 100644 --- a/focoos/__init__.py +++ b/focoos/__init__.py @@ -18,14 +18,14 @@ ModelMetadata, ModelPreview, ModelStatus, - OnnxEngineOpts, + OnnxRuntimeOpts, RuntimeTypes, SystemInfo, TrainingInfo, TrainInstance, ) from .remote_model import RemoteModel -from .runtime import ONNXRuntime, get_runtime +from .runtime import ONNXRuntime, load_runtime from .utils.logger import get_logger from .utils.system import get_system_info from .utils.vision import ( @@ -57,14 +57,14 @@ "Hyperparameters", "LatencyMetrics", "ModelPreview", - "OnnxEngineOpts", + "OnnxRuntimeOpts", "RuntimeTypes", "SystemInfo", "TrainingInfo", "TrainInstance", "get_system_info", "ONNXRuntime", - "get_runtime", + "load_runtime", "DEV_API_URL", "LOCAL_API_URL", "PROD_API_URL", diff --git a/focoos/focoos.py b/focoos/focoos.py index 1bda7b0..6278598 100644 --- a/focoos/focoos.py +++ b/focoos/focoos.py @@ -22,6 +22,7 @@ from focoos.local_model import LocalModel from focoos.ports import ( DatasetMetadata, + ModelFormat, ModelMetadata, ModelNotFound, ModelPreview, @@ -164,7 +165,7 @@ def list_focoos_models(self) -> list[ModelPreview]: def get_local_model( self, model_ref: str, - runtime_type: Optional[RuntimeTypes] = None, + runtime_type: Optional[RuntimeTypes] = RuntimeTypes.ONNX_CUDA32, ) -> LocalModel: """ Retrieves a local model for the specified reference. @@ -187,8 +188,12 @@ def get_local_model( """ runtime_type = runtime_type or FOCOOS_CONFIG.runtime_type model_dir = os.path.join(self.cache_dir, model_ref) - if not os.path.exists(os.path.join(model_dir, "model.onnx")): - self._download_model(model_ref) + format = ModelFormat.TORCHSCRIPT if runtime_type == RuntimeTypes.TORCHSCRIPT_32 else ModelFormat.ONNX + if not os.path.exists(os.path.join(model_dir, f"model.{format.value}")): + self._download_model( + model_ref, + format=format, + ) return LocalModel(model_dir, runtime_type) def get_remote_model(self, model_ref: str) -> RemoteModel: @@ -249,7 +254,7 @@ def list_shared_datasets(self) -> list[DatasetMetadata]: raise ValueError(f"Failed to list datasets: {res.status_code} {res.text}") return [DatasetMetadata.from_json(dataset) for dataset in res.json()] - def _download_model(self, model_ref: str) -> str: + def _download_model(self, model_ref: str, format: ModelFormat = ModelFormat.ONNX) -> str: """ Downloads a model from the Focoos API. @@ -263,14 +268,14 @@ def _download_model(self, model_ref: str) -> str: ValueError: If the API request fails or the download fails. """ model_dir = os.path.join(self.cache_dir, model_ref) - model_path = os.path.join(model_dir, "model.onnx") + model_path = os.path.join(model_dir, f"model.{format.value}") metadata_path = os.path.join(model_dir, "focoos_metadata.json") if os.path.exists(model_path) and os.path.exists(metadata_path): logger.info("📥 Model already downloaded") return model_path ## download model metadata - res = self.http_client.get(f"models/{model_ref}/download?format=onnx") + res = self.http_client.get(f"models/{model_ref}/download?format={format.value}") if res.status_code != 200: logger.error(f"Failed to download model: {res.status_code} {res.text}") raise ValueError(f"Failed to download model: {res.status_code} {res.text}") diff --git a/focoos/local_model.py b/focoos/local_model.py index 0a37fbe..9770843 100644 --- a/focoos/local_model.py +++ b/focoos/local_model.py @@ -32,10 +32,11 @@ FocoosDetections, FocoosTask, LatencyMetrics, + ModelFormat, ModelMetadata, RuntimeTypes, ) -from focoos.runtime import ONNXRuntime, get_runtime +from focoos.runtime import BaseRuntime, load_runtime from focoos.utils.logger import get_logger from focoos.utils.vision import ( image_preprocess, @@ -82,20 +83,32 @@ def __init__( and initializes the runtime for inference using the provided runtime type. Annotation utilities are also prepared for visualizing model outputs. """ + # Determine runtime type and model format runtime_type = runtime_type or FOCOOS_CONFIG.runtime_type + model_format = ModelFormat.TORCHSCRIPT if runtime_type == RuntimeTypes.TORCHSCRIPT_32 else ModelFormat.ONNX - logger.debug(f"Runtime type: {runtime_type}, Loading model from {model_dir},") - if not os.path.exists(model_dir): - raise FileNotFoundError(f"Model directory not found: {model_dir}") + # Set model directory and path self.model_dir: Union[str, Path] = model_dir + self.model_path = os.path.join(model_dir, f"model.{model_format.value}") + logger.debug(f"Runtime type: {runtime_type}, Loading model from {self.model_path}..") + + # Check if model path exists + if not os.path.exists(self.model_path): + raise FileNotFoundError(f"Model path not found: {self.model_path}") + + # Load metadata and set model reference self.metadata: ModelMetadata = self._read_metadata() self.model_ref = self.metadata.ref + + # Initialize annotation utilities self.label_annotator = sv.LabelAnnotator(text_padding=10, border_radius=10) self.box_annotator = sv.BoxAnnotator() self.mask_annotator = sv.MaskAnnotator() - self.runtime: ONNXRuntime = get_runtime( + + # Load runtime for inference + self.runtime: BaseRuntime = load_runtime( runtime_type, - str(os.path.join(model_dir, "model.onnx")), + str(self.model_path), self.metadata, FOCOOS_CONFIG.warmup_iter, ) diff --git a/focoos/ports.py b/focoos/ports.py index dcd54e4..ceb08b7 100644 --- a/focoos/ports.py +++ b/focoos/ports.py @@ -211,7 +211,7 @@ class FocoosDetections(FocoosBaseModel): @dataclass -class OnnxEngineOpts: +class OnnxRuntimeOpts: fp16: Optional[bool] = False cuda: Optional[bool] = False vino: Optional[bool] = False @@ -221,6 +221,13 @@ class OnnxEngineOpts: warmup_iter: int = 0 +@dataclass +class TorchscriptRuntimeOpts: + warmup_iter: int = 0 + optimize_for_inference: bool = True + set_fusion_strategy: bool = True + + @dataclass class LatencyMetrics: fps: int @@ -239,6 +246,12 @@ class RuntimeTypes(str, Enum): ONNX_TRT16 = "onnx_trt16" ONNX_CPU = "onnx_cpu" ONNX_COREML = "onnx_coreml" + TORCHSCRIPT_32 = "torchscript_32" + + +class ModelFormat(str, Enum): + ONNX = "onnx" + TORCHSCRIPT = "pt" class GPUInfo(FocoosBaseModel): @@ -266,6 +279,7 @@ class SystemInfo(FocoosBaseModel): gpu_cuda_version: Optional[str] = None gpus_info: Optional[list[GPUInfo]] = None packages_versions: Optional[dict[str, str]] = None + environment: Optional[dict[str, str]] = None def pretty_print(self): print("================ SYSTEM INFO ====================") @@ -286,6 +300,10 @@ def pretty_print(self): print(f"{key}:") for pkg_name, pkg_version in value.items(): print(f" - {pkg_name}: {pkg_version}") + elif isinstance(value, dict) and key == "environment": # Special formatting for environment + print(f"{key}:") + for env_key, env_value in value.items(): + print(f" - {env_key}: {env_value}") else: print(f"{key}: {value}") print("================================================") diff --git a/focoos/runtime.py b/focoos/runtime.py index 87e8182..bfdb678 100644 --- a/focoos/runtime.py +++ b/focoos/runtime.py @@ -15,26 +15,45 @@ ONNXRuntime: A class that interfaces with ONNX Runtime for model inference. """ +from abc import abstractmethod from pathlib import Path from time import perf_counter -from typing import List, Tuple +from typing import Any, List, Tuple import numpy as np -import onnxruntime as ort + +try: + import torch + + TORCH_AVAILABLE = True +except ImportError as e: + print(e) + TORCH_AVAILABLE = False + +try: + import onnxruntime as ort + + ORT_AVAILABLE = True +except ImportError: + ORT_AVAILABLE = False + import supervision as sv from focoos.ports import ( FocoosTask, LatencyMetrics, ModelMetadata, - OnnxEngineOpts, + OnnxRuntimeOpts, RuntimeTypes, + TorchscriptRuntimeOpts, ) from focoos.utils.logger import get_logger from focoos.utils.system import get_cpu_name, get_gpu_name GPU_ID = 0 +logger = get_logger() + def det_postprocess(out: List[np.ndarray], im0_shape: Tuple[int, int], conf_threshold: float) -> sv.Detections: """ @@ -68,7 +87,6 @@ def semseg_postprocess(out: List[np.ndarray], im0_shape: Tuple[int, int], conf_t Args: out (List[np.ndarray]): The output of the semantic segmentation model. - im0_shape (Tuple[int, int]): The original shape of the input image (height, width). conf_threshold (float): The confidence threshold for filtering detections. Returns: @@ -89,239 +107,211 @@ def semseg_postprocess(out: List[np.ndarray], im0_shape: Tuple[int, int], conf_t ) -class ONNXRuntime: +class BaseRuntime: + def __init__(self, model_path: str, opts: Any, model_metadata: ModelMetadata): + pass + + @abstractmethod + def __call__(self, im: np.ndarray, conf_threshold: float) -> sv.Detections: + pass + + @abstractmethod + def benchmark(self, iterations=20, size=640) -> LatencyMetrics: + pass + + +class ONNXRuntime(BaseRuntime): """ - A class that interfaces with ONNX Runtime for model inference using different execution providers - (CUDA, TensorRT, OpenVINO, CoreML, etc.). It manages preprocessing, inference, and postprocessing - of data, as well as benchmarking the performance of the model. - - Attributes: - logger (Logger): Logger for the ONNXRuntime instance. - name (str): The name of the model (derived from its path). - opts (OnnxEngineOpts): Options used for configuring the ONNX Runtime. - model_metadata (ModelMetadata): Metadata related to the model. - postprocess_fn (Callable): The function used to postprocess the model's output. - ort_sess (InferenceSession): The ONNXRuntime inference session. - dtype (np.dtype): The data type for the model input. - binding (Optional[str]): The binding type for the runtime (e.g., CUDA, CPU). + ONNX Runtime wrapper for model inference with different execution providers. + Handles preprocessing, inference, postprocessing and benchmarking. """ - def __init__(self, model_path: str, opts: OnnxEngineOpts, model_metadata: ModelMetadata): - """ - Initializes the ONNXRuntime instance with the specified model and configuration options. - - Args: - model_path (str): Path to the ONNX model file. - opts (OnnxEngineOpts): The configuration options for ONNX Runtime. - model_metadata (ModelMetadata): Metadata for the model (e.g., task type). - """ + def __init__(self, model_path: str, opts: OnnxRuntimeOpts, model_metadata: ModelMetadata): self.logger = get_logger() - self.logger.debug(f"[onnxruntime device] {ort.get_device()}") - self.logger.debug(f"[onnxruntime available providers] {ort.get_available_providers()}") + + self.logger.debug(f"🔧 [onnxruntime device] {ort.get_device()}") + self.logger.debug(f"🔧 [onnxruntime available providers] {ort.get_available_providers()}") + self.name = Path(model_path).stem self.opts = opts self.model_metadata = model_metadata self.postprocess_fn = det_postprocess if model_metadata.task == FocoosTask.DETECTION else semseg_postprocess + + # Setup session options options = ort.SessionOptions() - if opts.verbose: - options.log_severity_level = 0 + options.log_severity_level = 0 if opts.verbose else 2 options.enable_profiling = opts.verbose - # options.intra_op_num_threads = 1 - available_providers = ort.get_available_providers() - if opts.cuda and "CUDAExecutionProvider" not in available_providers: - self.logger.warning("CUDA ExecutionProvider not found.") - if opts.trt and "TensorrtExecutionProvider" not in available_providers: - self.logger.warning("Tensorrt ExecutionProvider not found.") - if opts.vino and "OpenVINOExecutionProvider" not in available_providers: - self.logger.warning("OpenVINO ExecutionProvider not found.") - if opts.coreml and "CoreMLExecutionProvider" not in available_providers: - self.logger.warning("CoreML ExecutionProvider not found.") - # Set providers - providers = [] - dtype = np.float32 - binding = None - if opts.trt and "TensorrtExecutionProvider" in available_providers: - providers.append( - ( - "TensorrtExecutionProvider", - { - "device_id": 0, - # 'trt_max_workspace_size': 1073741824, # 1 GB - "trt_fp16_enable": opts.fp16, - "trt_force_sequential_engine_build": False, - }, - ) - ) - dtype = np.float32 - elif opts.vino and "OpenVINOExecutionProvider" in available_providers: - providers.append( - ( - "OpenVINOExecutionProvider", - { - "device_type": "MYRIAD_FP16", - "enable_vpu_fast_compile": True, - "num_of_threads": 1, - }, - # 'use_compiled_network': False} - ) - ) - options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL - dtype = np.float32 - binding = None - elif opts.cuda and "CUDAExecutionProvider" in available_providers: - binding = "cuda" - options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL - providers.append( - ( - "CUDAExecutionProvider", - { - "device_id": GPU_ID, - "arena_extend_strategy": "kSameAsRequested", - "gpu_mem_limit": 16 * 1024 * 1024 * 1024, - "cudnn_conv_algo_search": "EXHAUSTIVE", - "do_copy_in_default_stream": True, - }, - ) - ) - elif opts.coreml and "CoreMLExecutionProvider" in available_providers: - # # options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL - providers.append("CoreMLExecutionProvider") - else: - binding = None - - binding = None # TODO: remove this - providers.append("CPUExecutionProvider") - self.dtype = dtype - self.binding = binding + + # Setup providers + providers = self._setup_providers() + + # Create session self.ort_sess = ort.InferenceSession(model_path, options, providers=providers) self.active_providers = self.ort_sess.get_providers() - self.logger.info(f"[onnxruntime] Active providers:{self.ort_sess.get_providers()}") - if self.ort_sess.get_inputs()[0].type == "tensor(uint8)": - self.dtype = np.uint8 - else: - self.dtype = np.float32 + self.logger.info(f"[onnxruntime] Active providers:{self.active_providers}") + + # Set input type + self.dtype = np.uint8 if self.ort_sess.get_inputs()[0].type == "tensor(uint8)" else np.float32 + + # Warmup if self.opts.warmup_iter > 0: - self.logger.info("⏱️ [onnxruntime] Warming up model ..") - for _ in range(self.opts.warmup_iter): - np_image = np.random.rand(1, 3, 640, 640).astype(self.dtype) - input_name = self.ort_sess.get_inputs()[0].name - out_name = [output.name for output in self.ort_sess.get_outputs()] - if self.binding is not None: - io_binding = self.ort_sess.io_binding() - io_binding.bind_input( - input_name, - self.binding, - device_id=GPU_ID, - element_type=self.dtype, - shape=np_image.shape, - buffer_ptr=np_image.ctypes.data, - ) - io_binding.bind_cpu_input(input_name, np_image) - io_binding.bind_output(out_name[0], self.binding) - self.ort_sess.run_with_iobinding(io_binding) - io_binding.copy_outputs_to_cpu() - else: - self.ort_sess.run(out_name, {input_name: np_image}) - - self.logger.info(f"⏱️ [onnxruntime] {self.name} WARMUP DONE") + self._warmup() - def __call__(self, im: np.ndarray, conf_threshold: float) -> sv.Detections: - """ - Runs inference on the provided input image and returns the model's detections. + def _setup_providers(self): + providers = [] + available = ort.get_available_providers() + + # Check and add providers in order of preference + provider_configs = [ + ( + "TensorrtExecutionProvider", + self.opts.trt, + {"device_id": 0, "trt_fp16_enable": self.opts.fp16, "trt_force_sequential_engine_build": False}, + ), + ( + "OpenVINOExecutionProvider", + self.opts.vino, + {"device_type": "MYRIAD_FP16", "enable_vpu_fast_compile": True, "num_of_threads": 1}, + ), + ( + "CUDAExecutionProvider", + self.opts.cuda, + { + "device_id": GPU_ID, + "arena_extend_strategy": "kSameAsRequested", + "gpu_mem_limit": 16 * 1024 * 1024 * 1024, + "cudnn_conv_algo_search": "EXHAUSTIVE", + "do_copy_in_default_stream": True, + }, + ), + ("CoreMLExecutionProvider", self.opts.coreml, {}), + ] + + for provider, enabled, config in provider_configs: + if enabled and provider in available: + providers.append((provider, config)) + elif enabled: + self.logger.warning(f"{provider} not found.") - Args: - im (np.ndarray): The preprocessed input image. - conf_threshold (float): The confidence threshold for filtering results. + providers.append("CPUExecutionProvider") + return providers - Returns: - sv.Detections: A sv.Detections object containing the model's output detections. - """ - out_name = None + def _warmup(self): + self.logger.info("⏱️ [onnxruntime] Warming up model ..") + np_image = np.random.rand(1, 3, 640, 640).astype(self.dtype) input_name = self.ort_sess.get_inputs()[0].name out_name = [output.name for output in self.ort_sess.get_outputs()] - if self.binding is not None: - self.logger.info(f"binding {self.binding}") - io_binding = self.ort_sess.io_binding() - - io_binding.bind_input( - input_name, - self.binding, - device_id=GPU_ID, - element_type=self.dtype, - shape=im.shape, - buffer_ptr=im.ctypes.data, - ) - - io_binding.bind_cpu_input(input_name, im) - io_binding.bind_output(out_name[0], self.binding) - self.ort_sess.run_with_iobinding(io_binding) - out = io_binding.copy_outputs_to_cpu() - else: - out = self.ort_sess.run(out_name, {input_name: im}) - - detections = self.postprocess_fn(out, (im.shape[2], im.shape[3]), conf_threshold) - return detections - def benchmark(self, iterations=20, size=640) -> LatencyMetrics: - """ - Benchmarks the model by running multiple inference iterations and measuring the latency. + for _ in range(self.opts.warmup_iter): + self.ort_sess.run(out_name, {input_name: np_image}) + + self.logger.info("⏱️ [onnxruntime] Warmup done") - Args: - iterations (int, optional): Number of iterations to run for benchmarking. Defaults to 20. - size (int, optional): The input image size for benchmarking. Defaults to 640. + def __call__(self, im: np.ndarray, conf_threshold: float) -> sv.Detections: + """Run inference and return detections.""" + input_name = self.ort_sess.get_inputs()[0].name + out_name = [output.name for output in self.ort_sess.get_outputs()] + out = self.ort_sess.run(out_name, {input_name: im}) + return self.postprocess_fn(out=out, im0_shape=(im.shape[2], im.shape[3]), conf_threshold=conf_threshold) - Returns: - LatencyMetrics: The latency metrics (e.g., FPS, mean, min, max, and standard deviation). - """ + def benchmark(self, iterations=20, size=640) -> LatencyMetrics: + """Benchmark model latency.""" self.logger.info("⏱️ [onnxruntime] Benchmarking latency..") size = size if isinstance(size, (tuple, list)) else (size, size) - durations = [] np_input = (255 * np.random.random((1, 3, size[0], size[1]))).astype(self.dtype) input_name = self.ort_sess.get_inputs()[0].name - out_name = self.ort_sess.get_outputs()[0].name - if self.binding: - io_binding = self.ort_sess.io_binding() - - io_binding.bind_input( - input_name, - "cuda", - device_id=0, - element_type=self.dtype, - shape=np_input.shape, - buffer_ptr=np_input.ctypes.data, - ) - - io_binding.bind_cpu_input(input_name, np_input) - io_binding.bind_output(out_name, "cuda") - else: - out_name = [output.name for output in self.ort_sess.get_outputs()] + out_name = [output.name for output in self.ort_sess.get_outputs()] + durations = [] for step in range(iterations + 5): - if self.binding: - start = perf_counter() - self.ort_sess.run_with_iobinding(io_binding) - end = perf_counter() - else: - start = perf_counter() - self.ort_sess.run(out_name, {input_name: np_input}) - end = perf_counter() + start = perf_counter() + self.ort_sess.run(out_name, {input_name: np_input}) + end = perf_counter() - if step >= 5: + if step >= 5: # Skip first 5 iterations durations.append((end - start) * 1000) + durations = np.array(durations) provider = self.active_providers[0] - if provider in ["CUDAExecutionProvider", "TensorrtExecutionProvider"]: - device = get_gpu_name() - else: - device = get_cpu_name() + device = ( + get_gpu_name() if provider in ["CUDAExecutionProvider", "TensorrtExecutionProvider"] else get_cpu_name() + ) + metrics = LatencyMetrics( fps=int(1000 / durations.mean()), engine=f"onnx.{provider}", - mean=round(durations.mean(), 3), - max=round(durations.max(), 3), - min=round(durations.min(), 3), - std=round(durations.std(), 3), + mean=round(durations.mean().astype(float), 3), + max=round(durations.max().astype(float), 3), + min=round(durations.min().astype(float), 3), + std=round(durations.std().astype(float), 3), + im_size=size[0], + device=str(device), + ) + self.logger.info(f"🔥 FPS: {metrics.fps}") + return metrics + + +class TorchscriptRuntime(BaseRuntime): + def __init__( + self, + model_path: str, + opts: TorchscriptRuntimeOpts, + model_metadata: ModelMetadata, + ): + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.logger = get_logger(name="TorchscriptEngine") + self.logger.info(f"🔧 [torchscript] Device: {self.device}") + self.opts = opts + self.postprocess_fn = det_postprocess if model_metadata.task == FocoosTask.DETECTION else semseg_postprocess + + map_location = None if torch.cuda.is_available() else "cpu" + + self.model = torch.jit.load(model_path, map_location=map_location) + self.model = self.model.to(self.device) + + if self.opts.warmup_iter > 0: + self.logger.info("⏱️ [torchscript] Warming up model..") + with torch.no_grad(): + np_image = torch.rand(1, 3, 640, 640, device=self.device) + for _ in range(self.opts.warmup_iter): + self.model(np_image) + self.logger.info("⏱️ [torchscript] WARMUP DONE") + + def __call__(self, im: np.ndarray, conf_threshold: float) -> sv.Detections: + """Run inference and return detections.""" + with torch.no_grad(): + torch_image = torch.from_numpy(im).to(self.device, dtype=torch.float32) + res = self.model(torch_image) + return self.postprocess_fn([r.cpu().numpy() for r in res], (im.shape[2], im.shape[3]), conf_threshold) + + def benchmark(self, iterations=20, size=640) -> LatencyMetrics: + """Benchmark model latency.""" + self.logger.info("⏱️ [torchscript] Benchmarking latency..") + size = size if isinstance(size, (tuple, list)) else (size, size) + + torch_input = torch.rand(1, 3, size[0], size[1], device=self.device) + durations = [] + + with torch.no_grad(): + for step in range(iterations + 5): + start = perf_counter() + self.model(torch_input) + end = perf_counter() + + if step >= 5: # Skip first 5 iterations + durations.append((end - start) * 1000) + + durations = np.array(durations) + device = get_gpu_name() if torch.cuda.is_available() else get_cpu_name() + + metrics = LatencyMetrics( + fps=int(1000 / durations.mean().astype(float)), + engine="torchscript", + mean=round(durations.mean().astype(float), 3), + max=round(durations.max().astype(float), 3), + min=round(durations.min().astype(float), 3), + std=round(durations.std().astype(float), 3), im_size=size[0], device=str(device), ) @@ -329,31 +319,50 @@ def benchmark(self, iterations=20, size=640) -> LatencyMetrics: return metrics -def get_runtime( +def load_runtime( runtime_type: RuntimeTypes, model_path: str, model_metadata: ModelMetadata, warmup_iter: int = 0, -) -> ONNXRuntime: +) -> BaseRuntime: """ - Creates and returns an ONNXRuntime instance based on the specified runtime type - and model path, with options for various execution providers (CUDA, TensorRT, CPU, etc.). + Creates and returns a runtime instance based on the specified runtime type. + Supports both ONNX and TorchScript runtimes with various execution providers. Args: - runtime_type (RuntimeTypes): The type of runtime to use (e.g., ONNX_CUDA32, ONNX_TRT32). - model_path (str): The path to the ONNX model. - model_metadata (ModelMetadata): Metadata describing the model. - warmup_iter (int, optional): Number of warmup iterations before benchmarking. Defaults to 0. + runtime_type (RuntimeTypes): The type of runtime to use. Can be one of: + - ONNX_CUDA32: ONNX runtime with CUDA FP32 + - ONNX_TRT32: ONNX runtime with TensorRT FP32 + - ONNX_TRT16: ONNX runtime with TensorRT FP16 + - ONNX_CPU: ONNX runtime with CPU + - ONNX_COREML: ONNX runtime with CoreML + - TORCHSCRIPT_32: TorchScript runtime with FP32 + model_path (str): Path to the model file (.onnx or .pt) + model_metadata (ModelMetadata): Model metadata containing task type, classes etc. + warmup_iter (int, optional): Number of warmup iterations before inference. Defaults to 0. Returns: - ONNXRuntime: A fully configured ONNXRuntime instance. + BaseRuntime: A configured runtime instance (ONNXRuntime or TorchscriptRuntime) + + Raises: + ImportError: If required dependencies (torch/onnxruntime) are not installed """ - opts = OnnxEngineOpts( - cuda=runtime_type == RuntimeTypes.ONNX_CUDA32, - trt=runtime_type in [RuntimeTypes.ONNX_TRT32, RuntimeTypes.ONNX_TRT16], - fp16=runtime_type == RuntimeTypes.ONNX_TRT16, - warmup_iter=warmup_iter, - coreml=runtime_type == RuntimeTypes.ONNX_COREML, - verbose=False, - ) + if runtime_type == RuntimeTypes.TORCHSCRIPT_32: + if not TORCH_AVAILABLE: + logger.error("⚠️ Pytorch not found =( please install focoos with ['torch'] extra") + raise ImportError("Pytorch not found") + opts = TorchscriptRuntimeOpts(warmup_iter=warmup_iter) + return TorchscriptRuntime(model_path, opts, model_metadata) + else: + if not ORT_AVAILABLE: + logger.error("⚠️ onnxruntime not found =( please install focoos with ['onnx'] extra") + raise ImportError("onnxruntime not found") + opts = OnnxRuntimeOpts( + cuda=runtime_type == RuntimeTypes.ONNX_CUDA32, + trt=runtime_type in [RuntimeTypes.ONNX_TRT32, RuntimeTypes.ONNX_TRT16], + fp16=runtime_type == RuntimeTypes.ONNX_TRT16, + warmup_iter=warmup_iter, + coreml=runtime_type == RuntimeTypes.ONNX_COREML, + verbose=False, + ) return ONNXRuntime(model_path, opts, model_metadata) diff --git a/focoos/utils/system.py b/focoos/utils/system.py index 6c35a64..0bce5f8 100644 --- a/focoos/utils/system.py +++ b/focoos/utils/system.py @@ -1,4 +1,5 @@ import importlib.metadata as metadata +import os import platform import subprocess from typing import Optional @@ -189,22 +190,23 @@ def get_cpu_name() -> Optional[str]: def get_system_info() -> SystemInfo: """ - Gather and return comprehensive system information. + Collect and return detailed system information. - This function collects various system metrics including CPU, memory, disk, - and GPU details, as well as installed package versions. It returns this - information encapsulated in a SystemInfo object. + This function gathers a wide range of system metrics, including CPU, memory, + disk, and GPU details, as well as versions of installed packages. The collected + information is encapsulated in a SystemInfo object. Returns: - SystemInfo: An object containing detailed information about the system's - hardware and software configuration, including: + SystemInfo: An object containing comprehensive details about the system's + hardware and software configuration, such as: - System and node name - - CPU type and core count + - CPU type and number of cores - Available ONNXRuntime providers - Memory and disk usage statistics - - GPU count, driver, and CUDA version - - Detailed GPU information if available + - Number of GPUs, driver, and CUDA version + - Detailed information for each GPU, if available - Versions of key installed packages + - Environment variables related to the system """ system_info = platform.uname() memory_info = psutil.virtual_memory() @@ -239,6 +241,10 @@ def get_system_info() -> SystemInfo: "pillow", "supervision", "pydantic", + "torch", + "torchvision", + "nvidia-cuda-runtime-cu12", + "tensorrt", ] versions = {} for package in packages: @@ -247,6 +253,18 @@ def get_system_info() -> SystemInfo: except metadata.PackageNotFoundError: versions[package] = "unknown" + environments_var = [ + "LD_LIBRARY_PATH", + "LD_PRELOAD", + "CUDA_HOME", + "CUDA_VISIBLE_DEVICES", + "FOCOOS_LOG_LEVEL", + "DEFAULT_HOST_URL", + ] + environments = {} + for var in environments_var: + environments[var] = os.getenv(var, "") + return SystemInfo( focoos_host=FOCOOS_CONFIG.default_host_url, system=system_info.system, @@ -263,4 +281,5 @@ def get_system_info() -> SystemInfo: gpu_cuda_version=get_cuda_version(), gpus_info=gpus_info, packages_versions=versions, + environment=environments, ) diff --git a/notebooks/playground.ipynb b/notebooks/playground.ipynb index b353968..d47719e 100644 --- a/notebooks/playground.ipynb +++ b/notebooks/playground.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Setup" + "# 🐍 Setup Focoos" ] }, { @@ -13,14 +13,21 @@ "metadata": {}, "outputs": [], "source": [ - "%uv pip install -e ..[dev,gpu]" + "%uv pip install -e ..[onnx,torch,tensorrt] # you can choose to install only the inference execution providers you need" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# 🤖 Focoos Foundational Models\n" + "# 🤖 Playground with Focoos pre-trained models\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Supported Tasks: Object Detection, Instance Segmentation, Semantic Segmentation" ] }, { @@ -32,9 +39,9 @@ "import os\n", "from pprint import pprint\n", "\n", - "from focoos import DEV_API_URL, Focoos\n", + "from focoos import Focoos\n", "\n", - "focoos = Focoos(api_key=os.getenv(\"FOCOOS_API_KEY\"), host_url=DEV_API_URL)\n", + "focoos = Focoos(api_key=os.getenv(\"FOCOOS_API_KEY\"))\n", "\n", "pprint(focoos.list_focoos_models())" ] @@ -43,7 +50,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Cloud Inference" + "## Remote Inference\n", + "This section demonstrates how to perform remote inference using a model from the Focoos platform.\n", + "We will load a remote model (can be a pre-trained model or a custom user model), and then run inference on a sample image with focoos API.\n" ] }, { @@ -57,17 +66,17 @@ "\n", "from supervision import plot_image\n", "\n", - "from focoos import DEV_API_URL, Focoos\n", + "from focoos import Focoos\n", "\n", "model_ref = \"focoos_object365\"\n", "image_path = \"./assets/ade_val_034.jpg\"\n", "\n", - "focoos = Focoos(api_key=os.getenv(\"FOCOOS_API_KEY\"), host_url=DEV_API_URL)\n", + "focoos = Focoos(api_key=os.getenv(\"FOCOOS_API_KEY\"))\n", "\n", "model = focoos.get_remote_model(model_ref)\n", "## Only admin can deploy foundational models\n", "\n", - "output, preview = model.infer(image_path, threshold=0.4, annotate=True)\n", + "output, preview = model.infer(image_path, threshold=0.6, annotate=True)\n", "plot_image(preview)" ] }, @@ -85,7 +94,33 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Available Runtime Types" + "### 🔧 Available Runtimes and Execution Providers\n", + "\n", + "#### To install the extras modules, use the command: `uv pip install .[{{extra-name}}]`.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- **TORCHSCRIPT** *`[torch]`*\n", + " - **CUDA FP32**\n", + " NVIDIA GPU acceleration with FP32 precision, excellent balance between speed and accuracy, suggested for segmentation tasks\n", + "- **ONNXRUNTIME** providers:\n", + " - **CPU** *`[cpu]`*\n", + "\n", + " Standard CPU execution, compatible with all systems but with limited performance\n", + " - **CUDA FP32** *`[gpu]`*\n", + "\n", + " NVIDIA GPU acceleration with FP32 precision, excellent balance between speed and accuracy\n", + " - **TensorRT FP16** *`[tensorrt]`*\n", + "\n", + " Maximum optimization for NVIDIA GPUs with reduced FP16 precision, exceptional performance, but slow model warmup due to tensorrt engine compilation\n", + " - **CoreML** \n", + " *`[cpu] extras`*\n", + " Optimized for Apple Silicon devices, leverages Neural Engine for hardware acceleration\n", + "\n", + "\n" ] }, { @@ -104,7 +139,27 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### OnnxRuntime With CUDA (focoos_object365)" + "### 🖥️ System info\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from focoos.utils.system import get_system_info\n", + "\n", + "system_info = get_system_info()\n", + "system_info.pretty_print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Inference with TorchscriptRuntime (CUDA32)" ] }, { @@ -118,19 +173,19 @@ "\n", "from supervision import plot_image\n", "\n", - "from focoos import DEV_API_URL, Focoos\n", + "from focoos import Focoos, RuntimeTypes\n", "\n", - "focoos = Focoos(api_key=os.getenv(\"FOCOOS_API_KEY\"), host_url=DEV_API_URL)\n", + "focoos = Focoos(api_key=os.getenv(\"FOCOOS_API_KEY\"))\n", "image_path = \"./assets/ade_val_034.jpg\"\n", "model_ref = \"focoos_object365\"\n", "\n", "\n", - "model = focoos.get_local_model(model_ref)\n", + "model = focoos.get_local_model(model_ref, runtime_type=RuntimeTypes.TORCHSCRIPT_32)\n", "\n", "latency = model.benchmark(iterations=10, size=640)\n", "pprint(latency)\n", "# pprint(latency)\n", - "output, preview = model.infer(image_path, threshold=0.3, annotate=True)\n", + "output, preview = model.infer(image_path, threshold=0.6, annotate=True)\n", "pprint(output.detections)\n", "pprint(output.latency)\n", "\n", @@ -141,7 +196,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### OnnxRuntime With TensorRT (FP16) (focoos_object365)" + "### Inference with OnnxRuntime (CUDA32) (focoos_object365)" ] }, { @@ -155,22 +210,19 @@ "\n", "from supervision import plot_image\n", "\n", - "from focoos import Focoos\n", - "from focoos.ports import RuntimeTypes\n", + "from focoos import DEV_API_URL, Focoos, RuntimeTypes\n", "\n", - "focoos = Focoos(\n", - " api_key=os.getenv(\"FOCOOS_API_KEY\"),\n", - ")\n", + "focoos = Focoos(api_key=os.getenv(\"FOCOOS_API_KEY\"), host_url=DEV_API_URL)\n", "image_path = \"./assets/ade_val_034.jpg\"\n", "model_ref = \"focoos_object365\"\n", "\n", "\n", - "model = focoos.get_local_model(model_ref, runtime_type=RuntimeTypes.ONNX_TRT16)\n", + "model = focoos.get_local_model(model_ref, runtime_type=RuntimeTypes.ONNX_CUDA32)\n", "\n", "latency = model.benchmark(iterations=10, size=640)\n", "pprint(latency)\n", "# pprint(latency)\n", - "output, preview = model.infer(image_path, threshold=0.3, annotate=True)\n", + "output, preview = model.infer(image_path, threshold=0.6, annotate=True)\n", "pprint(output.detections)\n", "pprint(output.latency)\n", "\n", @@ -181,14 +233,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# User Models" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### List User Models" + "### Inference with OnnxRuntime (TensorRT) (FP16)" ] }, { @@ -200,23 +245,42 @@ "import os\n", "from pprint import pprint\n", "\n", - "from dotenv import load_dotenv\n", + "from supervision import plot_image\n", "\n", - "from focoos import DEV_API_URL, Focoos\n", + "from focoos import Focoos\n", + "from focoos.ports import RuntimeTypes\n", "\n", - "load_dotenv()\n", + "focoos = Focoos(\n", + " api_key=os.getenv(\"FOCOOS_API_KEY\"),\n", + ")\n", + "image_path = \"./assets/ade_val_034.jpg\"\n", + "model_ref = \"focoos_object365\"\n", "\n", - "focoos = Focoos(api_key=os.getenv(\"FOCOOS_API_KEY\"), host_url=DEV_API_URL)\n", "\n", - "models = focoos.list_models()\n", - "pprint(models)" + "model = focoos.get_local_model(model_ref, runtime_type=RuntimeTypes.ONNX_TRT16)\n", + "\n", + "latency = model.benchmark(iterations=10, size=640)\n", + "pprint(latency)\n", + "# pprint(latency)\n", + "output, preview = model.infer(image_path, threshold=0.6, annotate=True)\n", + "pprint(output.detections)\n", + "pprint(output.latency)\n", + "\n", + "plot_image(preview)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Create Model" + "# User Models" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### List User Models" ] }, { @@ -236,15 +300,15 @@ "\n", "focoos = Focoos(api_key=os.getenv(\"FOCOOS_API_KEY\"), host_url=DEV_API_URL)\n", "\n", - "model = focoos.new_model(name=\"test-model\", focoos_model=\"focoos_object365\", description=\"Test model\")\n", - "### Get Model Info" + "models = focoos.list_models()\n", + "pprint(models)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Deploy user model on shared cloud endpoint" + "## Create Model" ] }, { @@ -264,8 +328,8 @@ "\n", "focoos = Focoos(api_key=os.getenv(\"FOCOOS_API_KEY\"), host_url=DEV_API_URL)\n", "\n", - "model = focoos.get_remote_model(os.getenv(\"FOCOOS_MODEL_REF\"))\n", - "model_info = model.get_info()" + "model = focoos.new_model(name=\"test-model\", focoos_model=\"focoos_object365\", description=\"Test model\")\n", + "### Get Model Info" ] }, { @@ -520,13 +584,6 @@ "metrics_visualizer.log_metrics()\n", "metrics_visualizer.notebook_plot_training_metrics()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/pyproject.toml b/pyproject.toml index b5bb6d2..19159f9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,7 @@ convention = "google" [tool.setuptools.packages.find] include = ["focoos**"] + [project] name = "focoos" version = "0.8.0" @@ -31,7 +32,7 @@ readme = "README.md" requires-python = ">=3.10" dependencies = [ "requests", - "Pillow~=10.4.0", + "Pillow~=10.2.0", "supervision~=0.25.1", "opencv-python~=4.11.0", "pydantic~=2.10.5", @@ -56,9 +57,10 @@ keywords = [ ] [project.optional-dependencies] -cpu = ["onnxruntime==1.20.1"] -gpu = ["onnxruntime-gpu==1.20.1","nvidia-cuda-runtime-cu12==12.4.127"] -tensorrt = ["tensorrt==10.5.0"] +default = ["onnxruntime==1.20.1"] +onnx = ["onnxruntime-gpu==1.20.1"] +tensorrt = ["onnxruntime-gpu==1.20.1","tensorrt==10.5.0"] +torch = ["torch==2.3.0","torchvision"] dev = [ "pytest", "pytest-cov", diff --git a/tests/test_local_model.py b/tests/test_local_model.py index 141856b..4a89a08 100644 --- a/tests/test_local_model.py +++ b/tests/test_local_model.py @@ -14,7 +14,7 @@ ModelMetadata, RuntimeTypes, ) -from focoos.runtime import ONNXRuntime +from focoos.runtime import ONNXRuntime, TorchscriptRuntime @pytest.fixture @@ -28,11 +28,12 @@ def mock_model_dir(tmp_path, mock_metadata: ModelMetadata): @pytest.fixture -def mock_local_model(mocker: MockerFixture, mock_model_dir, image_ndarray): +def mock_local_model_onnx(mocker: MockerFixture, mock_model_dir, image_ndarray): # Mock get_runtime mock_runtime = MagicMock(spec=ONNXRuntime) - mock_get_runtime = mocker.patch("focoos.local_model.get_runtime", mock_runtime) + mock_get_runtime = mocker.patch("focoos.local_model.load_runtime", mock_runtime) mock_get_runtime.return_value = mock_runtime + mocker.patch("focoos.local_model.os.path.exists", return_value=True) model = LocalModel(model_dir=mock_model_dir, runtime_type=RuntimeTypes.ONNX_CPU) # Mock BoxAnnotator @@ -54,26 +55,66 @@ def mock_local_model(mocker: MockerFixture, mock_model_dir, image_ndarray): return model +@pytest.fixture +def mock_local_model_torch(mocker: MockerFixture, mock_model_dir, image_ndarray): + # Mock get_runtime + mock_runtime = MagicMock(spec=TorchscriptRuntime) + mock_get_runtime = mocker.patch("focoos.local_model.load_runtime", mock_runtime) + mock_get_runtime.return_value = mock_runtime + mocker.patch("focoos.local_model.os.path.exists", return_value=True) + model = LocalModel(model_dir=mock_model_dir, runtime_type=RuntimeTypes.TORCHSCRIPT_32) + + # Mock BoxAnnotator + mock_box_annotator = mocker.patch("focoos.local_model.sv.BoxAnnotator", autospec=True) + mock_box_annotator.annotate = MagicMock(return_value=np.zeros_like(image_ndarray)) + + # Mock LabelAnnotator + mock_label_annotator = mocker.patch("focoos.local_model.sv.LabelAnnotator", autospec=True) + mock_label_annotator.annotate = MagicMock(return_value=np.zeros_like(image_ndarray)) + + # Mock MaskAnnotator + mock_mask_annotator = mocker.patch("focoos.local_model.sv.MaskAnnotator", autospec=True) + mock_mask_annotator.annotate = MagicMock(return_value=np.zeros_like(image_ndarray)) + + # Inject mock annotators into the local model + model.box_annotator = mock_box_annotator + model.label_annotator = mock_label_annotator + model.mask_annotator = mock_mask_annotator + return model + + def test_initialization_fail_no_model_dir(): with pytest.raises(FileNotFoundError): LocalModel(model_dir="fakedir", runtime_type=RuntimeTypes.ONNX_CPU) -def test_initialization(mock_local_model: LocalModel, mock_model_dir, mock_metadata): - assert mock_local_model.model_dir == mock_model_dir - assert mock_local_model.metadata == mock_metadata - assert isinstance(mock_local_model.runtime, ONNXRuntime) +def test_init_file_not_found(mocker: MockerFixture): + mocker.patch("focoos.local_model.os.path.exists", return_value=False) + with pytest.raises(FileNotFoundError): + LocalModel(model_dir="fakedir", runtime_type=RuntimeTypes.ONNX_CPU) + + +def test_initialization_onnx(mock_local_model_onnx: LocalModel, mock_model_dir, mock_metadata): + assert mock_local_model_onnx.model_dir == mock_model_dir + assert mock_local_model_onnx.metadata == mock_metadata + assert isinstance(mock_local_model_onnx.runtime, ONNXRuntime) + + +def test_initialization_torch(mock_local_model_torch: LocalModel, mock_model_dir, mock_metadata): + assert mock_local_model_torch.model_dir == mock_model_dir + assert mock_local_model_torch.metadata == mock_metadata + assert isinstance(mock_local_model_torch.runtime, TorchscriptRuntime) -def test_benchmark(mock_local_model: LocalModel): - mock_local_model.runtime.benchmark.return_value = MagicMock(spec=LatencyMetrics) +def test_benchmark(mock_local_model_onnx: LocalModel): + mock_local_model_onnx.runtime.benchmark.return_value = MagicMock(spec=LatencyMetrics) iterations, size = 10, 1000 - result = mock_local_model.benchmark(iterations, size) + result = mock_local_model_onnx.benchmark(iterations, size) assert result is not None assert isinstance(result, LatencyMetrics) - mock_local_model.runtime.benchmark.assert_called_once_with(iterations, size) + mock_local_model_onnx.runtime.benchmark.assert_called_once_with(iterations, size) @pytest.fixture @@ -99,34 +140,34 @@ def mock_sv_detections() -> sv.Detections: def test_annotate_detection_metadata_classes_none( - image_ndarray: np.ndarray, mock_local_model: LocalModel, mock_sv_detections + image_ndarray: np.ndarray, mock_local_model_onnx: LocalModel, mock_sv_detections ): - mock_local_model.metadata.classes = None - annotated_im = mock_local_model._annotate(image_ndarray, mock_sv_detections) + mock_local_model_onnx.metadata.classes = None + annotated_im = mock_local_model_onnx._annotate(image_ndarray, mock_sv_detections) assert annotated_im is not None assert isinstance(annotated_im, np.ndarray) - mock_local_model.box_annotator.annotate.assert_called_once() - mock_local_model.label_annotator.annotate.assert_called_once() - mock_local_model.mask_annotator.annotate.assert_not_called() + mock_local_model_onnx.box_annotator.annotate.assert_called_once() + mock_local_model_onnx.label_annotator.annotate.assert_called_once() + mock_local_model_onnx.mask_annotator.annotate.assert_not_called() -def test_annotate_detection(image_ndarray: np.ndarray, mock_local_model: LocalModel, mock_sv_detections): - annotated_im = mock_local_model._annotate(image_ndarray, mock_sv_detections) +def test_annotate_detection(image_ndarray: np.ndarray, mock_local_model_onnx: LocalModel, mock_sv_detections): + annotated_im = mock_local_model_onnx._annotate(image_ndarray, mock_sv_detections) assert annotated_im is not None assert isinstance(annotated_im, np.ndarray) - mock_local_model.box_annotator.annotate.assert_called_once() - mock_local_model.label_annotator.annotate.assert_called_once() - mock_local_model.mask_annotator.annotate.assert_not_called() + mock_local_model_onnx.box_annotator.annotate.assert_called_once() + mock_local_model_onnx.label_annotator.annotate.assert_called_once() + mock_local_model_onnx.mask_annotator.annotate.assert_not_called() -def test_annotate_semseg(image_ndarray: np.ndarray, mock_local_model: LocalModel, mock_sv_detections): - mock_local_model.metadata.task = FocoosTask.SEMSEG - annotated_im = mock_local_model._annotate(image_ndarray, mock_sv_detections) +def test_annotate_semseg(image_ndarray: np.ndarray, mock_local_model_onnx: LocalModel, mock_sv_detections): + mock_local_model_onnx.metadata.task = FocoosTask.SEMSEG + annotated_im = mock_local_model_onnx._annotate(image_ndarray, mock_sv_detections) assert annotated_im is not None assert isinstance(annotated_im, np.ndarray) - mock_local_model.box_annotator.annotate.asser_not_called() - mock_local_model.label_annotator.annotate.asser_not_called() - mock_local_model.mask_annotator.annotate.assert_called_once() + mock_local_model_onnx.box_annotator.annotate.asser_not_called() + mock_local_model_onnx.label_annotator.annotate.asser_not_called() + mock_local_model_onnx.mask_annotator.annotate.assert_called_once() def mock_infer_setup( @@ -177,7 +218,7 @@ def __call__(self, *args, **kwargs): @pytest.mark.parametrize("annotate", [(False, None)]) def test_infer_( mocker, - mock_local_model, + mock_local_model_onnx, image_ndarray, mock_sv_detections, mock_focoos_detections, @@ -186,7 +227,7 @@ def test_infer_( # Arrange *mock_to_call_once, mock_annotate = mock_infer_setup( mocker, - mock_local_model, + mock_local_model_onnx, image_ndarray, mock_sv_detections, mock_focoos_detections, @@ -194,7 +235,7 @@ def test_infer_( ) # Act - out, im = mock_local_model.infer(image=image_ndarray, annotate=annotate) + out, im = mock_local_model_onnx.infer(image=image_ndarray, annotate=annotate) # Assertions assert out is not None diff --git a/tests/test_runtime.py b/tests/test_runtime.py index bf8f1cf..6568015 100644 --- a/tests/test_runtime.py +++ b/tests/test_runtime.py @@ -5,8 +5,8 @@ import pytest from pytest_mock import MockerFixture -from focoos.ports import ModelMetadata, OnnxEngineOpts, RuntimeTypes -from focoos.runtime import ONNXRuntime, det_postprocess, get_runtime, semseg_postprocess +from focoos.ports import ModelMetadata, OnnxRuntimeOpts, RuntimeTypes, TorchscriptRuntimeOpts +from focoos.runtime import ONNXRuntime, TorchscriptRuntime, det_postprocess, load_runtime, semseg_postprocess def test_det_post_process(): @@ -78,7 +78,7 @@ def test_semseg_postprocess(): [ ( RuntimeTypes.ONNX_CUDA32, - OnnxEngineOpts( + OnnxRuntimeOpts( cuda=True, trt=False, fp16=False, @@ -89,7 +89,7 @@ def test_semseg_postprocess(): ), ( RuntimeTypes.ONNX_TRT32, - OnnxEngineOpts( + OnnxRuntimeOpts( cuda=False, trt=True, fp16=False, @@ -100,7 +100,7 @@ def test_semseg_postprocess(): ), ( RuntimeTypes.ONNX_TRT16, - OnnxEngineOpts( + OnnxRuntimeOpts( cuda=False, trt=True, fp16=True, @@ -111,7 +111,7 @@ def test_semseg_postprocess(): ), ( RuntimeTypes.ONNX_CPU, - OnnxEngineOpts( + OnnxRuntimeOpts( cuda=False, trt=False, fp16=False, @@ -122,7 +122,7 @@ def test_semseg_postprocess(): ), ( RuntimeTypes.ONNX_COREML, - OnnxEngineOpts( + OnnxRuntimeOpts( cuda=False, trt=False, fp16=False, @@ -131,6 +131,14 @@ def test_semseg_postprocess(): warmup_iter=2, ), ), + ( + RuntimeTypes.TORCHSCRIPT_32, + TorchscriptRuntimeOpts( + warmup_iter=2, + optimize_for_inference=True, + set_fusion_strategy=True, + ), + ), ], ) def test_get_run_time(mocker: MockerFixture, tmp_path, runtime_type, expected_opts): @@ -144,14 +152,18 @@ def test_get_run_time(mocker: MockerFixture, tmp_path, runtime_type, expected_op mock_model_metadata = MagicMock(spec=ModelMetadata) # mock opts - mock_onnxruntime_class = mocker.patch("focoos.runtime.ONNXRuntime", autospec=True) - mock_onnxruntime_class.return_value = MagicMock(spec=ONNXRuntime, opts=expected_opts) + if runtime_type == RuntimeTypes.TORCHSCRIPT_32: + mock_runtime_class = mocker.patch("focoos.runtime.TorchscriptRuntime", autospec=True) + mock_runtime_class.return_value = MagicMock(spec=TorchscriptRuntime, opts=expected_opts) + else: + mock_runtime_class = mocker.patch("focoos.runtime.ONNXRuntime", autospec=True) + mock_runtime_class.return_value = MagicMock(spec=ONNXRuntime, opts=expected_opts) # warmup_iter warmup_iter = 2 # call the function to test - onnx_runtime = get_runtime( + runtime = load_runtime( runtime_type=runtime_type, model_path=model_path, model_metadata=mock_model_metadata, @@ -159,8 +171,8 @@ def test_get_run_time(mocker: MockerFixture, tmp_path, runtime_type, expected_op ) # assertions - assert onnx_runtime is not None - mock_onnxruntime_class.assert_called_once_with( + assert runtime is not None + mock_runtime_class.assert_called_once_with( model_path, expected_opts, mock_model_metadata,