diff --git a/Makefile b/Makefile
index eeb3612..d099ab4 100644
--- a/Makefile
+++ b/Makefile
@@ -10,11 +10,11 @@ venv:
 	@uv venv --python=python3.12
 
 install: .uv .pre-commit
-	@uv pip install -e ".[cpu,dev]"
+	@uv pip install -e ".[dev]" --no-cache-dir
 	@pre-commit install
 
 install-gpu: .uv .pre-commit
-	@uv pip install -e ".[dev,gpu]"
+	@uv pip install -e ".[dev,onnx,tensorrt,torch]" --no-cache-dir
 	@pre-commit install
 
 lint:
diff --git a/README.md b/README.md
index 7db7342..f3d7b34 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# Focoos Foundational Models
+# Focoos pre-trained models
 
 | Model Name          | Task                  | Metrics | Domain                          |
 | ------------------- | --------------------- | ------- | ------------------------------- |
@@ -14,50 +14,69 @@
 | focoos_isaid_nano   | Semantic Segmentation | -       | Satellite Imagery, 15 classes   |
 | focoos_isaid_medium | Semantic Segmentation | -       | Satellite Imagery, 15 classes   |
 
-# Focoos SDK
+# Focoos
+Focoos is a comprehensive SDK designed for computer vision tasks such as object detection, semantic segmentation, instance segmentation, and more. It provides pre-trained models that can be easily integrated and customized by users for various applications.
+Focoos supports both cloud and local inference, and enables training on the cloud, making it a versatile tool for developers working in different domains, including autonomous driving, common scenes, drone aerial scenes, and satellite imagery.
 
-![Tests](https://github.com/FocoosAI/focoos/actions/workflows/test.yml/badge.svg??event=push&branch=main)
-
-## Requirements
-
-### CUDA 12
+### Key Features
 
-For **local inference**, ensure that you have CUDA 12 and cuDNN 9 installed, as they are required for onnxruntime version 1.20.1.
+- **Pre-trained Models**: A wide range of pre-trained models for different tasks and domains.
+- **Multiple Inference Runtimes**: Support for various inference runtimes including CPU, GPU, Torchscript CUDA, OnnxRuntime CUDA, and OnnxRuntime TensorRT.
+- **Cloud Inference**: API to Focoos cloud inference.
+- **Local Inference**: local inference, making it easy to deploy models on the local machine.
+- **Cloud Training**: Train user models on the focoos cloud.
+- **Model Monitoring**: Monitor model performance and metrics.
 
-To install cuDNN 9:
+![Tests](https://github.com/FocoosAI/focoos/actions/workflows/test.yml/badge.svg??event=push&branch=main)
 
+# 🐍 Setup
+We recommend using [UV](https://docs.astral.sh/uv/) as a package manager and environment manager for a streamlined dependency management experience.
+Here’s how to create a new virtual environment with UV:
 ```bash
-apt-get -y install cudnn9-cuda-12
+pip install uv
+uv venv --python 3.12
+source .venv/bin/activate
 ```
 
-### (Optional) TensorRT
+Focoos models support multiple inference runtimes.
+To keep the library lightweight, optional dependencies (e.g., torch, onnxruntime, tensorrt) are not installed by default.
+You can install the required optional dependencies using the following syntax:
 
-To perform inference using TensorRT, ensure you have TensorRT version 10.5 installed.
+## CPU only or Remote Usage
 
 ```bash
-sudo apt-get install tensorrt
+uv pip install focoos git+https://github.com/FocoosAI/focoos.git
 ```
 
-# Install
+## GPU Runtimes
+### Torchscript CUDA
+```bash
+uv pip install focoos[torch] git+https://github.com/FocoosAI/focoos.git
+```
 
-Nvidia GPU:
+### OnnxRuntime CUDA
+ensure that you have CUDA 12 and cuDNN 9 installed, as they are required for onnxruntime version 1.20.1.
 
 ```bash
-pip install '.[gpu]'
+apt-get -y install cudnn9-cuda-12
 ```
 
-Nvidia GPU,TensorRT:
-
 ```bash
-pip install '.[gpu,tensorrt]'
+uv pip install focoos[onnx] git+https://github.com/FocoosAI/focoos.gi
 ```
 
-CPU,COREML:
+### OnnxRuntime TensorRT
+
+To perform inference using TensorRT, ensure you have TensorRT version 10.5 installed.
+```bash
+sudo apt-get install tensorrt
+```
 
 ```bash
-pip install '.[cpu]'
+uv pip install focoos[tensorrt] git+https://github.com/FocoosAI/focoos.git
 ```
 
+
 ## 🤖 Cloud Inference
 
 ```python
@@ -74,7 +93,7 @@ detections = model.infer("./image.jpg", threshold=0.4)
 setup FOCOOS_API_KEY_GRADIO environment variable with your Focoos API key
 
 ```bash
-pip install '.[gradio]'
+uv pip install focoos[gradio] git+https://github.com/FocoosAI/focoos.git
 ```
 
 ```bash
diff --git a/focoos/__init__.py b/focoos/__init__.py
index 5665b00..570d8c1 100644
--- a/focoos/__init__.py
+++ b/focoos/__init__.py
@@ -18,14 +18,14 @@
     ModelMetadata,
     ModelPreview,
     ModelStatus,
-    OnnxEngineOpts,
+    OnnxRuntimeOpts,
     RuntimeTypes,
     SystemInfo,
     TrainingInfo,
     TrainInstance,
 )
 from .remote_model import RemoteModel
-from .runtime import ONNXRuntime, get_runtime
+from .runtime import ONNXRuntime, load_runtime
 from .utils.logger import get_logger
 from .utils.system import get_system_info
 from .utils.vision import (
@@ -57,14 +57,14 @@
     "Hyperparameters",
     "LatencyMetrics",
     "ModelPreview",
-    "OnnxEngineOpts",
+    "OnnxRuntimeOpts",
     "RuntimeTypes",
     "SystemInfo",
     "TrainingInfo",
     "TrainInstance",
     "get_system_info",
     "ONNXRuntime",
-    "get_runtime",
+    "load_runtime",
     "DEV_API_URL",
     "LOCAL_API_URL",
     "PROD_API_URL",
diff --git a/focoos/focoos.py b/focoos/focoos.py
index 1bda7b0..6278598 100644
--- a/focoos/focoos.py
+++ b/focoos/focoos.py
@@ -22,6 +22,7 @@
 from focoos.local_model import LocalModel
 from focoos.ports import (
     DatasetMetadata,
+    ModelFormat,
     ModelMetadata,
     ModelNotFound,
     ModelPreview,
@@ -164,7 +165,7 @@ def list_focoos_models(self) -> list[ModelPreview]:
     def get_local_model(
         self,
         model_ref: str,
-        runtime_type: Optional[RuntimeTypes] = None,
+        runtime_type: Optional[RuntimeTypes] = RuntimeTypes.ONNX_CUDA32,
     ) -> LocalModel:
         """
         Retrieves a local model for the specified reference.
@@ -187,8 +188,12 @@ def get_local_model(
         """
         runtime_type = runtime_type or FOCOOS_CONFIG.runtime_type
         model_dir = os.path.join(self.cache_dir, model_ref)
-        if not os.path.exists(os.path.join(model_dir, "model.onnx")):
-            self._download_model(model_ref)
+        format = ModelFormat.TORCHSCRIPT if runtime_type == RuntimeTypes.TORCHSCRIPT_32 else ModelFormat.ONNX
+        if not os.path.exists(os.path.join(model_dir, f"model.{format.value}")):
+            self._download_model(
+                model_ref,
+                format=format,
+            )
         return LocalModel(model_dir, runtime_type)
 
     def get_remote_model(self, model_ref: str) -> RemoteModel:
@@ -249,7 +254,7 @@ def list_shared_datasets(self) -> list[DatasetMetadata]:
             raise ValueError(f"Failed to list datasets: {res.status_code} {res.text}")
         return [DatasetMetadata.from_json(dataset) for dataset in res.json()]
 
-    def _download_model(self, model_ref: str) -> str:
+    def _download_model(self, model_ref: str, format: ModelFormat = ModelFormat.ONNX) -> str:
         """
         Downloads a model from the Focoos API.
 
@@ -263,14 +268,14 @@ def _download_model(self, model_ref: str) -> str:
             ValueError: If the API request fails or the download fails.
         """
         model_dir = os.path.join(self.cache_dir, model_ref)
-        model_path = os.path.join(model_dir, "model.onnx")
+        model_path = os.path.join(model_dir, f"model.{format.value}")
         metadata_path = os.path.join(model_dir, "focoos_metadata.json")
         if os.path.exists(model_path) and os.path.exists(metadata_path):
             logger.info("📥 Model already downloaded")
             return model_path
 
         ## download model metadata
-        res = self.http_client.get(f"models/{model_ref}/download?format=onnx")
+        res = self.http_client.get(f"models/{model_ref}/download?format={format.value}")
         if res.status_code != 200:
             logger.error(f"Failed to download model: {res.status_code} {res.text}")
             raise ValueError(f"Failed to download model: {res.status_code} {res.text}")
diff --git a/focoos/local_model.py b/focoos/local_model.py
index 0a37fbe..9770843 100644
--- a/focoos/local_model.py
+++ b/focoos/local_model.py
@@ -32,10 +32,11 @@
     FocoosDetections,
     FocoosTask,
     LatencyMetrics,
+    ModelFormat,
     ModelMetadata,
     RuntimeTypes,
 )
-from focoos.runtime import ONNXRuntime, get_runtime
+from focoos.runtime import BaseRuntime, load_runtime
 from focoos.utils.logger import get_logger
 from focoos.utils.vision import (
     image_preprocess,
@@ -82,20 +83,32 @@ def __init__(
         and initializes the runtime for inference using the provided runtime type. Annotation
         utilities are also prepared for visualizing model outputs.
         """
+        # Determine runtime type and model format
         runtime_type = runtime_type or FOCOOS_CONFIG.runtime_type
+        model_format = ModelFormat.TORCHSCRIPT if runtime_type == RuntimeTypes.TORCHSCRIPT_32 else ModelFormat.ONNX
 
-        logger.debug(f"Runtime type: {runtime_type}, Loading model from {model_dir},")
-        if not os.path.exists(model_dir):
-            raise FileNotFoundError(f"Model directory not found: {model_dir}")
+        # Set model directory and path
         self.model_dir: Union[str, Path] = model_dir
+        self.model_path = os.path.join(model_dir, f"model.{model_format.value}")
+        logger.debug(f"Runtime type: {runtime_type}, Loading model from {self.model_path}..")
+
+        # Check if model path exists
+        if not os.path.exists(self.model_path):
+            raise FileNotFoundError(f"Model path not found: {self.model_path}")
+
+        # Load metadata and set model reference
         self.metadata: ModelMetadata = self._read_metadata()
         self.model_ref = self.metadata.ref
+
+        # Initialize annotation utilities
         self.label_annotator = sv.LabelAnnotator(text_padding=10, border_radius=10)
         self.box_annotator = sv.BoxAnnotator()
         self.mask_annotator = sv.MaskAnnotator()
-        self.runtime: ONNXRuntime = get_runtime(
+
+        # Load runtime for inference
+        self.runtime: BaseRuntime = load_runtime(
             runtime_type,
-            str(os.path.join(model_dir, "model.onnx")),
+            str(self.model_path),
             self.metadata,
             FOCOOS_CONFIG.warmup_iter,
         )
diff --git a/focoos/ports.py b/focoos/ports.py
index dcd54e4..ceb08b7 100644
--- a/focoos/ports.py
+++ b/focoos/ports.py
@@ -211,7 +211,7 @@ class FocoosDetections(FocoosBaseModel):
 
 
 @dataclass
-class OnnxEngineOpts:
+class OnnxRuntimeOpts:
     fp16: Optional[bool] = False
     cuda: Optional[bool] = False
     vino: Optional[bool] = False
@@ -221,6 +221,13 @@ class OnnxEngineOpts:
     warmup_iter: int = 0
 
 
+@dataclass
+class TorchscriptRuntimeOpts:
+    warmup_iter: int = 0
+    optimize_for_inference: bool = True
+    set_fusion_strategy: bool = True
+
+
 @dataclass
 class LatencyMetrics:
     fps: int
@@ -239,6 +246,12 @@ class RuntimeTypes(str, Enum):
     ONNX_TRT16 = "onnx_trt16"
     ONNX_CPU = "onnx_cpu"
     ONNX_COREML = "onnx_coreml"
+    TORCHSCRIPT_32 = "torchscript_32"
+
+
+class ModelFormat(str, Enum):
+    ONNX = "onnx"
+    TORCHSCRIPT = "pt"
 
 
 class GPUInfo(FocoosBaseModel):
@@ -266,6 +279,7 @@ class SystemInfo(FocoosBaseModel):
     gpu_cuda_version: Optional[str] = None
     gpus_info: Optional[list[GPUInfo]] = None
     packages_versions: Optional[dict[str, str]] = None
+    environment: Optional[dict[str, str]] = None
 
     def pretty_print(self):
         print("================ SYSTEM INFO ====================")
@@ -286,6 +300,10 @@ def pretty_print(self):
                 print(f"{key}:")
                 for pkg_name, pkg_version in value.items():
                     print(f"  - {pkg_name}: {pkg_version}")
+            elif isinstance(value, dict) and key == "environment":  # Special formatting for environment
+                print(f"{key}:")
+                for env_key, env_value in value.items():
+                    print(f"  - {env_key}: {env_value}")
             else:
                 print(f"{key}: {value}")
         print("================================================")
diff --git a/focoos/runtime.py b/focoos/runtime.py
index 87e8182..bfdb678 100644
--- a/focoos/runtime.py
+++ b/focoos/runtime.py
@@ -15,26 +15,45 @@
     ONNXRuntime: A class that interfaces with ONNX Runtime for model inference.
 """
 
+from abc import abstractmethod
 from pathlib import Path
 from time import perf_counter
-from typing import List, Tuple
+from typing import Any, List, Tuple
 
 import numpy as np
-import onnxruntime as ort
+
+try:
+    import torch
+
+    TORCH_AVAILABLE = True
+except ImportError as e:
+    print(e)
+    TORCH_AVAILABLE = False
+
+try:
+    import onnxruntime as ort
+
+    ORT_AVAILABLE = True
+except ImportError:
+    ORT_AVAILABLE = False
+
 import supervision as sv
 
 from focoos.ports import (
     FocoosTask,
     LatencyMetrics,
     ModelMetadata,
-    OnnxEngineOpts,
+    OnnxRuntimeOpts,
     RuntimeTypes,
+    TorchscriptRuntimeOpts,
 )
 from focoos.utils.logger import get_logger
 from focoos.utils.system import get_cpu_name, get_gpu_name
 
 GPU_ID = 0
 
+logger = get_logger()
+
 
 def det_postprocess(out: List[np.ndarray], im0_shape: Tuple[int, int], conf_threshold: float) -> sv.Detections:
     """
@@ -68,7 +87,6 @@ def semseg_postprocess(out: List[np.ndarray], im0_shape: Tuple[int, int], conf_t
 
     Args:
         out (List[np.ndarray]): The output of the semantic segmentation model.
-        im0_shape (Tuple[int, int]): The original shape of the input image (height, width).
         conf_threshold (float): The confidence threshold for filtering detections.
 
     Returns:
@@ -89,239 +107,211 @@ def semseg_postprocess(out: List[np.ndarray], im0_shape: Tuple[int, int], conf_t
     )
 
 
-class ONNXRuntime:
+class BaseRuntime:
+    def __init__(self, model_path: str, opts: Any, model_metadata: ModelMetadata):
+        pass
+
+    @abstractmethod
+    def __call__(self, im: np.ndarray, conf_threshold: float) -> sv.Detections:
+        pass
+
+    @abstractmethod
+    def benchmark(self, iterations=20, size=640) -> LatencyMetrics:
+        pass
+
+
+class ONNXRuntime(BaseRuntime):
     """
-    A class that interfaces with ONNX Runtime for model inference using different execution providers
-    (CUDA, TensorRT, OpenVINO, CoreML, etc.). It manages preprocessing, inference, and postprocessing
-    of data, as well as benchmarking the performance of the model.
-
-    Attributes:
-        logger (Logger): Logger for the ONNXRuntime instance.
-        name (str): The name of the model (derived from its path).
-        opts (OnnxEngineOpts): Options used for configuring the ONNX Runtime.
-        model_metadata (ModelMetadata): Metadata related to the model.
-        postprocess_fn (Callable): The function used to postprocess the model's output.
-        ort_sess (InferenceSession): The ONNXRuntime inference session.
-        dtype (np.dtype): The data type for the model input.
-        binding (Optional[str]): The binding type for the runtime (e.g., CUDA, CPU).
+    ONNX Runtime wrapper for model inference with different execution providers.
+    Handles preprocessing, inference, postprocessing and benchmarking.
     """
 
-    def __init__(self, model_path: str, opts: OnnxEngineOpts, model_metadata: ModelMetadata):
-        """
-        Initializes the ONNXRuntime instance with the specified model and configuration options.
-
-        Args:
-            model_path (str): Path to the ONNX model file.
-            opts (OnnxEngineOpts): The configuration options for ONNX Runtime.
-            model_metadata (ModelMetadata): Metadata for the model (e.g., task type).
-        """
+    def __init__(self, model_path: str, opts: OnnxRuntimeOpts, model_metadata: ModelMetadata):
         self.logger = get_logger()
-        self.logger.debug(f"[onnxruntime device] {ort.get_device()}")
-        self.logger.debug(f"[onnxruntime available providers] {ort.get_available_providers()}")
+
+        self.logger.debug(f"🔧 [onnxruntime device] {ort.get_device()}")
+        self.logger.debug(f"🔧 [onnxruntime available providers] {ort.get_available_providers()}")
+
         self.name = Path(model_path).stem
         self.opts = opts
         self.model_metadata = model_metadata
         self.postprocess_fn = det_postprocess if model_metadata.task == FocoosTask.DETECTION else semseg_postprocess
+
+        # Setup session options
         options = ort.SessionOptions()
-        if opts.verbose:
-            options.log_severity_level = 0
+        options.log_severity_level = 0 if opts.verbose else 2
         options.enable_profiling = opts.verbose
-        # options.intra_op_num_threads = 1
-        available_providers = ort.get_available_providers()
-        if opts.cuda and "CUDAExecutionProvider" not in available_providers:
-            self.logger.warning("CUDA ExecutionProvider not found.")
-        if opts.trt and "TensorrtExecutionProvider" not in available_providers:
-            self.logger.warning("Tensorrt ExecutionProvider not found.")
-        if opts.vino and "OpenVINOExecutionProvider" not in available_providers:
-            self.logger.warning("OpenVINO ExecutionProvider not found.")
-        if opts.coreml and "CoreMLExecutionProvider" not in available_providers:
-            self.logger.warning("CoreML ExecutionProvider not found.")
-        # Set providers
-        providers = []
-        dtype = np.float32
-        binding = None
-        if opts.trt and "TensorrtExecutionProvider" in available_providers:
-            providers.append(
-                (
-                    "TensorrtExecutionProvider",
-                    {
-                        "device_id": 0,
-                        # 'trt_max_workspace_size': 1073741824,  # 1 GB
-                        "trt_fp16_enable": opts.fp16,
-                        "trt_force_sequential_engine_build": False,
-                    },
-                )
-            )
-            dtype = np.float32
-        elif opts.vino and "OpenVINOExecutionProvider" in available_providers:
-            providers.append(
-                (
-                    "OpenVINOExecutionProvider",
-                    {
-                        "device_type": "MYRIAD_FP16",
-                        "enable_vpu_fast_compile": True,
-                        "num_of_threads": 1,
-                    },
-                    # 'use_compiled_network': False}
-                )
-            )
-            options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL
-            dtype = np.float32
-            binding = None
-        elif opts.cuda and "CUDAExecutionProvider" in available_providers:
-            binding = "cuda"
-            options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
-            providers.append(
-                (
-                    "CUDAExecutionProvider",
-                    {
-                        "device_id": GPU_ID,
-                        "arena_extend_strategy": "kSameAsRequested",
-                        "gpu_mem_limit": 16 * 1024 * 1024 * 1024,
-                        "cudnn_conv_algo_search": "EXHAUSTIVE",
-                        "do_copy_in_default_stream": True,
-                    },
-                )
-            )
-        elif opts.coreml and "CoreMLExecutionProvider" in available_providers:
-            #     # options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
-            providers.append("CoreMLExecutionProvider")
-        else:
-            binding = None
-
-        binding = None  # TODO: remove this
-        providers.append("CPUExecutionProvider")
-        self.dtype = dtype
-        self.binding = binding
+
+        # Setup providers
+        providers = self._setup_providers()
+
+        # Create session
         self.ort_sess = ort.InferenceSession(model_path, options, providers=providers)
         self.active_providers = self.ort_sess.get_providers()
-        self.logger.info(f"[onnxruntime] Active providers:{self.ort_sess.get_providers()}")
-        if self.ort_sess.get_inputs()[0].type == "tensor(uint8)":
-            self.dtype = np.uint8
-        else:
-            self.dtype = np.float32
+        self.logger.info(f"[onnxruntime] Active providers:{self.active_providers}")
+
+        # Set input type
+        self.dtype = np.uint8 if self.ort_sess.get_inputs()[0].type == "tensor(uint8)" else np.float32
+
+        # Warmup
         if self.opts.warmup_iter > 0:
-            self.logger.info("⏱️ [onnxruntime] Warming up model ..")
-            for _ in range(self.opts.warmup_iter):
-                np_image = np.random.rand(1, 3, 640, 640).astype(self.dtype)
-                input_name = self.ort_sess.get_inputs()[0].name
-                out_name = [output.name for output in self.ort_sess.get_outputs()]
-                if self.binding is not None:
-                    io_binding = self.ort_sess.io_binding()
-                    io_binding.bind_input(
-                        input_name,
-                        self.binding,
-                        device_id=GPU_ID,
-                        element_type=self.dtype,
-                        shape=np_image.shape,
-                        buffer_ptr=np_image.ctypes.data,
-                    )
-                    io_binding.bind_cpu_input(input_name, np_image)
-                    io_binding.bind_output(out_name[0], self.binding)
-                    self.ort_sess.run_with_iobinding(io_binding)
-                    io_binding.copy_outputs_to_cpu()
-                else:
-                    self.ort_sess.run(out_name, {input_name: np_image})
-
-            self.logger.info(f"⏱️ [onnxruntime] {self.name} WARMUP DONE")
+            self._warmup()
 
-    def __call__(self, im: np.ndarray, conf_threshold: float) -> sv.Detections:
-        """
-        Runs inference on the provided input image and returns the model's detections.
+    def _setup_providers(self):
+        providers = []
+        available = ort.get_available_providers()
+
+        # Check and add providers in order of preference
+        provider_configs = [
+            (
+                "TensorrtExecutionProvider",
+                self.opts.trt,
+                {"device_id": 0, "trt_fp16_enable": self.opts.fp16, "trt_force_sequential_engine_build": False},
+            ),
+            (
+                "OpenVINOExecutionProvider",
+                self.opts.vino,
+                {"device_type": "MYRIAD_FP16", "enable_vpu_fast_compile": True, "num_of_threads": 1},
+            ),
+            (
+                "CUDAExecutionProvider",
+                self.opts.cuda,
+                {
+                    "device_id": GPU_ID,
+                    "arena_extend_strategy": "kSameAsRequested",
+                    "gpu_mem_limit": 16 * 1024 * 1024 * 1024,
+                    "cudnn_conv_algo_search": "EXHAUSTIVE",
+                    "do_copy_in_default_stream": True,
+                },
+            ),
+            ("CoreMLExecutionProvider", self.opts.coreml, {}),
+        ]
+
+        for provider, enabled, config in provider_configs:
+            if enabled and provider in available:
+                providers.append((provider, config))
+            elif enabled:
+                self.logger.warning(f"{provider} not found.")
 
-        Args:
-            im (np.ndarray): The preprocessed input image.
-            conf_threshold (float): The confidence threshold for filtering results.
+        providers.append("CPUExecutionProvider")
+        return providers
 
-        Returns:
-            sv.Detections: A sv.Detections object containing the model's output detections.
-        """
-        out_name = None
+    def _warmup(self):
+        self.logger.info("⏱️ [onnxruntime] Warming up model ..")
+        np_image = np.random.rand(1, 3, 640, 640).astype(self.dtype)
         input_name = self.ort_sess.get_inputs()[0].name
         out_name = [output.name for output in self.ort_sess.get_outputs()]
-        if self.binding is not None:
-            self.logger.info(f"binding {self.binding}")
-            io_binding = self.ort_sess.io_binding()
-
-            io_binding.bind_input(
-                input_name,
-                self.binding,
-                device_id=GPU_ID,
-                element_type=self.dtype,
-                shape=im.shape,
-                buffer_ptr=im.ctypes.data,
-            )
-
-            io_binding.bind_cpu_input(input_name, im)
-            io_binding.bind_output(out_name[0], self.binding)
-            self.ort_sess.run_with_iobinding(io_binding)
-            out = io_binding.copy_outputs_to_cpu()
-        else:
-            out = self.ort_sess.run(out_name, {input_name: im})
-
-        detections = self.postprocess_fn(out, (im.shape[2], im.shape[3]), conf_threshold)
-        return detections
 
-    def benchmark(self, iterations=20, size=640) -> LatencyMetrics:
-        """
-        Benchmarks the model by running multiple inference iterations and measuring the latency.
+        for _ in range(self.opts.warmup_iter):
+            self.ort_sess.run(out_name, {input_name: np_image})
+
+        self.logger.info("⏱️ [onnxruntime] Warmup done")
 
-        Args:
-            iterations (int, optional): Number of iterations to run for benchmarking. Defaults to 20.
-            size (int, optional): The input image size for benchmarking. Defaults to 640.
+    def __call__(self, im: np.ndarray, conf_threshold: float) -> sv.Detections:
+        """Run inference and return detections."""
+        input_name = self.ort_sess.get_inputs()[0].name
+        out_name = [output.name for output in self.ort_sess.get_outputs()]
+        out = self.ort_sess.run(out_name, {input_name: im})
+        return self.postprocess_fn(out=out, im0_shape=(im.shape[2], im.shape[3]), conf_threshold=conf_threshold)
 
-        Returns:
-            LatencyMetrics: The latency metrics (e.g., FPS, mean, min, max, and standard deviation).
-        """
+    def benchmark(self, iterations=20, size=640) -> LatencyMetrics:
+        """Benchmark model latency."""
         self.logger.info("⏱️ [onnxruntime] Benchmarking latency..")
         size = size if isinstance(size, (tuple, list)) else (size, size)
 
-        durations = []
         np_input = (255 * np.random.random((1, 3, size[0], size[1]))).astype(self.dtype)
         input_name = self.ort_sess.get_inputs()[0].name
-        out_name = self.ort_sess.get_outputs()[0].name
-        if self.binding:
-            io_binding = self.ort_sess.io_binding()
-
-            io_binding.bind_input(
-                input_name,
-                "cuda",
-                device_id=0,
-                element_type=self.dtype,
-                shape=np_input.shape,
-                buffer_ptr=np_input.ctypes.data,
-            )
-
-            io_binding.bind_cpu_input(input_name, np_input)
-            io_binding.bind_output(out_name, "cuda")
-        else:
-            out_name = [output.name for output in self.ort_sess.get_outputs()]
+        out_name = [output.name for output in self.ort_sess.get_outputs()]
 
+        durations = []
         for step in range(iterations + 5):
-            if self.binding:
-                start = perf_counter()
-                self.ort_sess.run_with_iobinding(io_binding)
-                end = perf_counter()
-            else:
-                start = perf_counter()
-                self.ort_sess.run(out_name, {input_name: np_input})
-                end = perf_counter()
+            start = perf_counter()
+            self.ort_sess.run(out_name, {input_name: np_input})
+            end = perf_counter()
 
-            if step >= 5:
+            if step >= 5:  # Skip first 5 iterations
                 durations.append((end - start) * 1000)
+
         durations = np.array(durations)
         provider = self.active_providers[0]
-        if provider in ["CUDAExecutionProvider", "TensorrtExecutionProvider"]:
-            device = get_gpu_name()
-        else:
-            device = get_cpu_name()
+        device = (
+            get_gpu_name() if provider in ["CUDAExecutionProvider", "TensorrtExecutionProvider"] else get_cpu_name()
+        )
+
         metrics = LatencyMetrics(
             fps=int(1000 / durations.mean()),
             engine=f"onnx.{provider}",
-            mean=round(durations.mean(), 3),
-            max=round(durations.max(), 3),
-            min=round(durations.min(), 3),
-            std=round(durations.std(), 3),
+            mean=round(durations.mean().astype(float), 3),
+            max=round(durations.max().astype(float), 3),
+            min=round(durations.min().astype(float), 3),
+            std=round(durations.std().astype(float), 3),
+            im_size=size[0],
+            device=str(device),
+        )
+        self.logger.info(f"🔥 FPS: {metrics.fps}")
+        return metrics
+
+
+class TorchscriptRuntime(BaseRuntime):
+    def __init__(
+        self,
+        model_path: str,
+        opts: TorchscriptRuntimeOpts,
+        model_metadata: ModelMetadata,
+    ):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.logger = get_logger(name="TorchscriptEngine")
+        self.logger.info(f"🔧 [torchscript] Device: {self.device}")
+        self.opts = opts
+        self.postprocess_fn = det_postprocess if model_metadata.task == FocoosTask.DETECTION else semseg_postprocess
+
+        map_location = None if torch.cuda.is_available() else "cpu"
+
+        self.model = torch.jit.load(model_path, map_location=map_location)
+        self.model = self.model.to(self.device)
+
+        if self.opts.warmup_iter > 0:
+            self.logger.info("⏱️ [torchscript] Warming up model..")
+            with torch.no_grad():
+                np_image = torch.rand(1, 3, 640, 640, device=self.device)
+                for _ in range(self.opts.warmup_iter):
+                    self.model(np_image)
+            self.logger.info("⏱️ [torchscript] WARMUP DONE")
+
+    def __call__(self, im: np.ndarray, conf_threshold: float) -> sv.Detections:
+        """Run inference and return detections."""
+        with torch.no_grad():
+            torch_image = torch.from_numpy(im).to(self.device, dtype=torch.float32)
+            res = self.model(torch_image)
+            return self.postprocess_fn([r.cpu().numpy() for r in res], (im.shape[2], im.shape[3]), conf_threshold)
+
+    def benchmark(self, iterations=20, size=640) -> LatencyMetrics:
+        """Benchmark model latency."""
+        self.logger.info("⏱️ [torchscript] Benchmarking latency..")
+        size = size if isinstance(size, (tuple, list)) else (size, size)
+
+        torch_input = torch.rand(1, 3, size[0], size[1], device=self.device)
+        durations = []
+
+        with torch.no_grad():
+            for step in range(iterations + 5):
+                start = perf_counter()
+                self.model(torch_input)
+                end = perf_counter()
+
+                if step >= 5:  # Skip first 5 iterations
+                    durations.append((end - start) * 1000)
+
+        durations = np.array(durations)
+        device = get_gpu_name() if torch.cuda.is_available() else get_cpu_name()
+
+        metrics = LatencyMetrics(
+            fps=int(1000 / durations.mean().astype(float)),
+            engine="torchscript",
+            mean=round(durations.mean().astype(float), 3),
+            max=round(durations.max().astype(float), 3),
+            min=round(durations.min().astype(float), 3),
+            std=round(durations.std().astype(float), 3),
             im_size=size[0],
             device=str(device),
         )
@@ -329,31 +319,50 @@ def benchmark(self, iterations=20, size=640) -> LatencyMetrics:
         return metrics
 
 
-def get_runtime(
+def load_runtime(
     runtime_type: RuntimeTypes,
     model_path: str,
     model_metadata: ModelMetadata,
     warmup_iter: int = 0,
-) -> ONNXRuntime:
+) -> BaseRuntime:
     """
-    Creates and returns an ONNXRuntime instance based on the specified runtime type
-    and model path, with options for various execution providers (CUDA, TensorRT, CPU, etc.).
+    Creates and returns a runtime instance based on the specified runtime type.
+    Supports both ONNX and TorchScript runtimes with various execution providers.
 
     Args:
-        runtime_type (RuntimeTypes): The type of runtime to use (e.g., ONNX_CUDA32, ONNX_TRT32).
-        model_path (str): The path to the ONNX model.
-        model_metadata (ModelMetadata): Metadata describing the model.
-        warmup_iter (int, optional): Number of warmup iterations before benchmarking. Defaults to 0.
+        runtime_type (RuntimeTypes): The type of runtime to use. Can be one of:
+            - ONNX_CUDA32: ONNX runtime with CUDA FP32
+            - ONNX_TRT32: ONNX runtime with TensorRT FP32
+            - ONNX_TRT16: ONNX runtime with TensorRT FP16
+            - ONNX_CPU: ONNX runtime with CPU
+            - ONNX_COREML: ONNX runtime with CoreML
+            - TORCHSCRIPT_32: TorchScript runtime with FP32
+        model_path (str): Path to the model file (.onnx or .pt)
+        model_metadata (ModelMetadata): Model metadata containing task type, classes etc.
+        warmup_iter (int, optional): Number of warmup iterations before inference. Defaults to 0.
 
     Returns:
-        ONNXRuntime: A fully configured ONNXRuntime instance.
+        BaseRuntime: A configured runtime instance (ONNXRuntime or TorchscriptRuntime)
+
+    Raises:
+        ImportError: If required dependencies (torch/onnxruntime) are not installed
     """
-    opts = OnnxEngineOpts(
-        cuda=runtime_type == RuntimeTypes.ONNX_CUDA32,
-        trt=runtime_type in [RuntimeTypes.ONNX_TRT32, RuntimeTypes.ONNX_TRT16],
-        fp16=runtime_type == RuntimeTypes.ONNX_TRT16,
-        warmup_iter=warmup_iter,
-        coreml=runtime_type == RuntimeTypes.ONNX_COREML,
-        verbose=False,
-    )
+    if runtime_type == RuntimeTypes.TORCHSCRIPT_32:
+        if not TORCH_AVAILABLE:
+            logger.error("⚠️ Pytorch not found =(  please install focoos with ['torch'] extra")
+            raise ImportError("Pytorch not found")
+        opts = TorchscriptRuntimeOpts(warmup_iter=warmup_iter)
+        return TorchscriptRuntime(model_path, opts, model_metadata)
+    else:
+        if not ORT_AVAILABLE:
+            logger.error("⚠️ onnxruntime not found =(  please install focoos with ['onnx'] extra")
+            raise ImportError("onnxruntime not found")
+        opts = OnnxRuntimeOpts(
+            cuda=runtime_type == RuntimeTypes.ONNX_CUDA32,
+            trt=runtime_type in [RuntimeTypes.ONNX_TRT32, RuntimeTypes.ONNX_TRT16],
+            fp16=runtime_type == RuntimeTypes.ONNX_TRT16,
+            warmup_iter=warmup_iter,
+            coreml=runtime_type == RuntimeTypes.ONNX_COREML,
+            verbose=False,
+        )
     return ONNXRuntime(model_path, opts, model_metadata)
diff --git a/focoos/utils/system.py b/focoos/utils/system.py
index 6c35a64..0bce5f8 100644
--- a/focoos/utils/system.py
+++ b/focoos/utils/system.py
@@ -1,4 +1,5 @@
 import importlib.metadata as metadata
+import os
 import platform
 import subprocess
 from typing import Optional
@@ -189,22 +190,23 @@ def get_cpu_name() -> Optional[str]:
 
 def get_system_info() -> SystemInfo:
     """
-    Gather and return comprehensive system information.
+    Collect and return detailed system information.
 
-    This function collects various system metrics including CPU, memory, disk,
-    and GPU details, as well as installed package versions. It returns this
-    information encapsulated in a SystemInfo object.
+    This function gathers a wide range of system metrics, including CPU, memory,
+    disk, and GPU details, as well as versions of installed packages. The collected
+    information is encapsulated in a SystemInfo object.
 
     Returns:
-        SystemInfo: An object containing detailed information about the system's
-        hardware and software configuration, including:
+        SystemInfo: An object containing comprehensive details about the system's
+        hardware and software configuration, such as:
             - System and node name
-            - CPU type and core count
+            - CPU type and number of cores
             - Available ONNXRuntime providers
             - Memory and disk usage statistics
-            - GPU count, driver, and CUDA version
-            - Detailed GPU information if available
+            - Number of GPUs, driver, and CUDA version
+            - Detailed information for each GPU, if available
             - Versions of key installed packages
+            - Environment variables related to the system
     """
     system_info = platform.uname()
     memory_info = psutil.virtual_memory()
@@ -239,6 +241,10 @@ def get_system_info() -> SystemInfo:
         "pillow",
         "supervision",
         "pydantic",
+        "torch",
+        "torchvision",
+        "nvidia-cuda-runtime-cu12",
+        "tensorrt",
     ]
     versions = {}
     for package in packages:
@@ -247,6 +253,18 @@ def get_system_info() -> SystemInfo:
         except metadata.PackageNotFoundError:
             versions[package] = "unknown"
 
+    environments_var = [
+        "LD_LIBRARY_PATH",
+        "LD_PRELOAD",
+        "CUDA_HOME",
+        "CUDA_VISIBLE_DEVICES",
+        "FOCOOS_LOG_LEVEL",
+        "DEFAULT_HOST_URL",
+    ]
+    environments = {}
+    for var in environments_var:
+        environments[var] = os.getenv(var, "")
+
     return SystemInfo(
         focoos_host=FOCOOS_CONFIG.default_host_url,
         system=system_info.system,
@@ -263,4 +281,5 @@ def get_system_info() -> SystemInfo:
         gpu_cuda_version=get_cuda_version(),
         gpus_info=gpus_info,
         packages_versions=versions,
+        environment=environments,
     )
diff --git a/notebooks/playground.ipynb b/notebooks/playground.ipynb
index b353968..d47719e 100644
--- a/notebooks/playground.ipynb
+++ b/notebooks/playground.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Setup"
+    "# 🐍 Setup Focoos"
    ]
   },
   {
@@ -13,14 +13,21 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%uv pip install -e ..[dev,gpu]"
+    "%uv pip install -e ..[onnx,torch,tensorrt] # you can choose to install only the inference execution providers you need"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# 🤖 Focoos Foundational Models\n"
+    "# 🤖 Playground with Focoos pre-trained models\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Supported Tasks: Object Detection, Instance Segmentation, Semantic Segmentation"
    ]
   },
   {
@@ -32,9 +39,9 @@
     "import os\n",
     "from pprint import pprint\n",
     "\n",
-    "from focoos import DEV_API_URL, Focoos\n",
+    "from focoos import Focoos\n",
     "\n",
-    "focoos = Focoos(api_key=os.getenv(\"FOCOOS_API_KEY\"), host_url=DEV_API_URL)\n",
+    "focoos = Focoos(api_key=os.getenv(\"FOCOOS_API_KEY\"))\n",
     "\n",
     "pprint(focoos.list_focoos_models())"
    ]
@@ -43,7 +50,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Cloud Inference"
+    "## Remote Inference\n",
+    "This section demonstrates how to perform remote inference using a model from the Focoos platform.\n",
+    "We will load a remote model (can be a pre-trained model or a custom user model), and then run inference on a sample image with focoos API.\n"
    ]
   },
   {
@@ -57,17 +66,17 @@
     "\n",
     "from supervision import plot_image\n",
     "\n",
-    "from focoos import DEV_API_URL, Focoos\n",
+    "from focoos import Focoos\n",
     "\n",
     "model_ref = \"focoos_object365\"\n",
     "image_path = \"./assets/ade_val_034.jpg\"\n",
     "\n",
-    "focoos = Focoos(api_key=os.getenv(\"FOCOOS_API_KEY\"), host_url=DEV_API_URL)\n",
+    "focoos = Focoos(api_key=os.getenv(\"FOCOOS_API_KEY\"))\n",
     "\n",
     "model = focoos.get_remote_model(model_ref)\n",
     "## Only admin can deploy foundational models\n",
     "\n",
-    "output, preview = model.infer(image_path, threshold=0.4, annotate=True)\n",
+    "output, preview = model.infer(image_path, threshold=0.6, annotate=True)\n",
     "plot_image(preview)"
    ]
   },
@@ -85,7 +94,33 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Available Runtime Types"
+    "### 🔧 Available Runtimes and Execution Providers\n",
+    "\n",
+    "#### To install the extras modules, use the command: `uv pip install .[{{extra-name}}]`.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- **TORCHSCRIPT** *`[torch]`*\n",
+    "    - **CUDA FP32**\n",
+    "        NVIDIA GPU acceleration with FP32 precision, excellent balance between speed and accuracy, suggested for segmentation tasks\n",
+    "- **ONNXRUNTIME** providers:\n",
+    "    - **CPU** *`[cpu]`*\n",
+    "\n",
+    "        Standard CPU execution, compatible with all systems but with limited performance\n",
+    "    - **CUDA FP32** *`[gpu]`*\n",
+    "\n",
+    "        NVIDIA GPU acceleration with FP32 precision, excellent balance between speed and accuracy\n",
+    "    - **TensorRT FP16** *`[tensorrt]`*\n",
+    "\n",
+    "        Maximum optimization for NVIDIA GPUs with reduced FP16 precision, exceptional performance, but slow model warmup due to tensorrt engine compilation\n",
+    "    - **CoreML** \n",
+    "        *`[cpu] extras`*\n",
+    "        Optimized for Apple Silicon devices, leverages Neural Engine for hardware acceleration\n",
+    "\n",
+    "\n"
    ]
   },
   {
@@ -104,7 +139,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### OnnxRuntime With CUDA (focoos_object365)"
+    "### 🖥️  System info\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from focoos.utils.system import get_system_info\n",
+    "\n",
+    "system_info = get_system_info()\n",
+    "system_info.pretty_print()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Inference with TorchscriptRuntime (CUDA32)"
    ]
   },
   {
@@ -118,19 +173,19 @@
     "\n",
     "from supervision import plot_image\n",
     "\n",
-    "from focoos import DEV_API_URL, Focoos\n",
+    "from focoos import Focoos, RuntimeTypes\n",
     "\n",
-    "focoos = Focoos(api_key=os.getenv(\"FOCOOS_API_KEY\"), host_url=DEV_API_URL)\n",
+    "focoos = Focoos(api_key=os.getenv(\"FOCOOS_API_KEY\"))\n",
     "image_path = \"./assets/ade_val_034.jpg\"\n",
     "model_ref = \"focoos_object365\"\n",
     "\n",
     "\n",
-    "model = focoos.get_local_model(model_ref)\n",
+    "model = focoos.get_local_model(model_ref, runtime_type=RuntimeTypes.TORCHSCRIPT_32)\n",
     "\n",
     "latency = model.benchmark(iterations=10, size=640)\n",
     "pprint(latency)\n",
     "# pprint(latency)\n",
-    "output, preview = model.infer(image_path, threshold=0.3, annotate=True)\n",
+    "output, preview = model.infer(image_path, threshold=0.6, annotate=True)\n",
     "pprint(output.detections)\n",
     "pprint(output.latency)\n",
     "\n",
@@ -141,7 +196,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### OnnxRuntime With TensorRT (FP16) (focoos_object365)"
+    "### Inference with OnnxRuntime (CUDA32) (focoos_object365)"
    ]
   },
   {
@@ -155,22 +210,19 @@
     "\n",
     "from supervision import plot_image\n",
     "\n",
-    "from focoos import Focoos\n",
-    "from focoos.ports import RuntimeTypes\n",
+    "from focoos import DEV_API_URL, Focoos, RuntimeTypes\n",
     "\n",
-    "focoos = Focoos(\n",
-    "    api_key=os.getenv(\"FOCOOS_API_KEY\"),\n",
-    ")\n",
+    "focoos = Focoos(api_key=os.getenv(\"FOCOOS_API_KEY\"), host_url=DEV_API_URL)\n",
     "image_path = \"./assets/ade_val_034.jpg\"\n",
     "model_ref = \"focoos_object365\"\n",
     "\n",
     "\n",
-    "model = focoos.get_local_model(model_ref, runtime_type=RuntimeTypes.ONNX_TRT16)\n",
+    "model = focoos.get_local_model(model_ref, runtime_type=RuntimeTypes.ONNX_CUDA32)\n",
     "\n",
     "latency = model.benchmark(iterations=10, size=640)\n",
     "pprint(latency)\n",
     "# pprint(latency)\n",
-    "output, preview = model.infer(image_path, threshold=0.3, annotate=True)\n",
+    "output, preview = model.infer(image_path, threshold=0.6, annotate=True)\n",
     "pprint(output.detections)\n",
     "pprint(output.latency)\n",
     "\n",
@@ -181,14 +233,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# User Models"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### List User Models"
+    "### Inference with OnnxRuntime (TensorRT) (FP16)"
    ]
   },
   {
@@ -200,23 +245,42 @@
     "import os\n",
     "from pprint import pprint\n",
     "\n",
-    "from dotenv import load_dotenv\n",
+    "from supervision import plot_image\n",
     "\n",
-    "from focoos import DEV_API_URL, Focoos\n",
+    "from focoos import Focoos\n",
+    "from focoos.ports import RuntimeTypes\n",
     "\n",
-    "load_dotenv()\n",
+    "focoos = Focoos(\n",
+    "    api_key=os.getenv(\"FOCOOS_API_KEY\"),\n",
+    ")\n",
+    "image_path = \"./assets/ade_val_034.jpg\"\n",
+    "model_ref = \"focoos_object365\"\n",
     "\n",
-    "focoos = Focoos(api_key=os.getenv(\"FOCOOS_API_KEY\"), host_url=DEV_API_URL)\n",
     "\n",
-    "models = focoos.list_models()\n",
-    "pprint(models)"
+    "model = focoos.get_local_model(model_ref, runtime_type=RuntimeTypes.ONNX_TRT16)\n",
+    "\n",
+    "latency = model.benchmark(iterations=10, size=640)\n",
+    "pprint(latency)\n",
+    "# pprint(latency)\n",
+    "output, preview = model.infer(image_path, threshold=0.6, annotate=True)\n",
+    "pprint(output.detections)\n",
+    "pprint(output.latency)\n",
+    "\n",
+    "plot_image(preview)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Create Model"
+    "# User Models"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### List User Models"
    ]
   },
   {
@@ -236,15 +300,15 @@
     "\n",
     "focoos = Focoos(api_key=os.getenv(\"FOCOOS_API_KEY\"), host_url=DEV_API_URL)\n",
     "\n",
-    "model = focoos.new_model(name=\"test-model\", focoos_model=\"focoos_object365\", description=\"Test model\")\n",
-    "### Get Model Info"
+    "models = focoos.list_models()\n",
+    "pprint(models)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Deploy user model on shared cloud endpoint"
+    "## Create Model"
    ]
   },
   {
@@ -264,8 +328,8 @@
     "\n",
     "focoos = Focoos(api_key=os.getenv(\"FOCOOS_API_KEY\"), host_url=DEV_API_URL)\n",
     "\n",
-    "model = focoos.get_remote_model(os.getenv(\"FOCOOS_MODEL_REF\"))\n",
-    "model_info = model.get_info()"
+    "model = focoos.new_model(name=\"test-model\", focoos_model=\"focoos_object365\", description=\"Test model\")\n",
+    "### Get Model Info"
    ]
   },
   {
@@ -520,13 +584,6 @@
     "metrics_visualizer.log_metrics()\n",
     "metrics_visualizer.notebook_plot_training_metrics()"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
diff --git a/pyproject.toml b/pyproject.toml
index b5bb6d2..19159f9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,6 +23,7 @@ convention = "google"
 [tool.setuptools.packages.find]
 include = ["focoos**"]
 
+
 [project]
 name = "focoos"
 version = "0.8.0"
@@ -31,7 +32,7 @@ readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
     "requests",
-    "Pillow~=10.4.0",
+    "Pillow~=10.2.0",
     "supervision~=0.25.1",
     "opencv-python~=4.11.0",
     "pydantic~=2.10.5",
@@ -56,9 +57,10 @@ keywords = [
 ]
 
 [project.optional-dependencies]
-cpu = ["onnxruntime==1.20.1"]
-gpu = ["onnxruntime-gpu==1.20.1","nvidia-cuda-runtime-cu12==12.4.127"]
-tensorrt = ["tensorrt==10.5.0"]
+default = ["onnxruntime==1.20.1"]
+onnx = ["onnxruntime-gpu==1.20.1"]
+tensorrt = ["onnxruntime-gpu==1.20.1","tensorrt==10.5.0"]
+torch = ["torch==2.3.0","torchvision"]
 dev = [
     "pytest",
     "pytest-cov",
diff --git a/tests/test_local_model.py b/tests/test_local_model.py
index 141856b..4a89a08 100644
--- a/tests/test_local_model.py
+++ b/tests/test_local_model.py
@@ -14,7 +14,7 @@
     ModelMetadata,
     RuntimeTypes,
 )
-from focoos.runtime import ONNXRuntime
+from focoos.runtime import ONNXRuntime, TorchscriptRuntime
 
 
 @pytest.fixture
@@ -28,11 +28,12 @@ def mock_model_dir(tmp_path, mock_metadata: ModelMetadata):
 
 
 @pytest.fixture
-def mock_local_model(mocker: MockerFixture, mock_model_dir, image_ndarray):
+def mock_local_model_onnx(mocker: MockerFixture, mock_model_dir, image_ndarray):
     # Mock get_runtime
     mock_runtime = MagicMock(spec=ONNXRuntime)
-    mock_get_runtime = mocker.patch("focoos.local_model.get_runtime", mock_runtime)
+    mock_get_runtime = mocker.patch("focoos.local_model.load_runtime", mock_runtime)
     mock_get_runtime.return_value = mock_runtime
+    mocker.patch("focoos.local_model.os.path.exists", return_value=True)
     model = LocalModel(model_dir=mock_model_dir, runtime_type=RuntimeTypes.ONNX_CPU)
 
     # Mock BoxAnnotator
@@ -54,26 +55,66 @@ def mock_local_model(mocker: MockerFixture, mock_model_dir, image_ndarray):
     return model
 
 
+@pytest.fixture
+def mock_local_model_torch(mocker: MockerFixture, mock_model_dir, image_ndarray):
+    # Mock get_runtime
+    mock_runtime = MagicMock(spec=TorchscriptRuntime)
+    mock_get_runtime = mocker.patch("focoos.local_model.load_runtime", mock_runtime)
+    mock_get_runtime.return_value = mock_runtime
+    mocker.patch("focoos.local_model.os.path.exists", return_value=True)
+    model = LocalModel(model_dir=mock_model_dir, runtime_type=RuntimeTypes.TORCHSCRIPT_32)
+
+    # Mock BoxAnnotator
+    mock_box_annotator = mocker.patch("focoos.local_model.sv.BoxAnnotator", autospec=True)
+    mock_box_annotator.annotate = MagicMock(return_value=np.zeros_like(image_ndarray))
+
+    # Mock LabelAnnotator
+    mock_label_annotator = mocker.patch("focoos.local_model.sv.LabelAnnotator", autospec=True)
+    mock_label_annotator.annotate = MagicMock(return_value=np.zeros_like(image_ndarray))
+
+    # Mock MaskAnnotator
+    mock_mask_annotator = mocker.patch("focoos.local_model.sv.MaskAnnotator", autospec=True)
+    mock_mask_annotator.annotate = MagicMock(return_value=np.zeros_like(image_ndarray))
+
+    # Inject mock annotators into the local model
+    model.box_annotator = mock_box_annotator
+    model.label_annotator = mock_label_annotator
+    model.mask_annotator = mock_mask_annotator
+    return model
+
+
 def test_initialization_fail_no_model_dir():
     with pytest.raises(FileNotFoundError):
         LocalModel(model_dir="fakedir", runtime_type=RuntimeTypes.ONNX_CPU)
 
 
-def test_initialization(mock_local_model: LocalModel, mock_model_dir, mock_metadata):
-    assert mock_local_model.model_dir == mock_model_dir
-    assert mock_local_model.metadata == mock_metadata
-    assert isinstance(mock_local_model.runtime, ONNXRuntime)
+def test_init_file_not_found(mocker: MockerFixture):
+    mocker.patch("focoos.local_model.os.path.exists", return_value=False)
+    with pytest.raises(FileNotFoundError):
+        LocalModel(model_dir="fakedir", runtime_type=RuntimeTypes.ONNX_CPU)
+
+
+def test_initialization_onnx(mock_local_model_onnx: LocalModel, mock_model_dir, mock_metadata):
+    assert mock_local_model_onnx.model_dir == mock_model_dir
+    assert mock_local_model_onnx.metadata == mock_metadata
+    assert isinstance(mock_local_model_onnx.runtime, ONNXRuntime)
+
+
+def test_initialization_torch(mock_local_model_torch: LocalModel, mock_model_dir, mock_metadata):
+    assert mock_local_model_torch.model_dir == mock_model_dir
+    assert mock_local_model_torch.metadata == mock_metadata
+    assert isinstance(mock_local_model_torch.runtime, TorchscriptRuntime)
 
 
-def test_benchmark(mock_local_model: LocalModel):
-    mock_local_model.runtime.benchmark.return_value = MagicMock(spec=LatencyMetrics)
+def test_benchmark(mock_local_model_onnx: LocalModel):
+    mock_local_model_onnx.runtime.benchmark.return_value = MagicMock(spec=LatencyMetrics)
     iterations, size = 10, 1000
 
-    result = mock_local_model.benchmark(iterations, size)
+    result = mock_local_model_onnx.benchmark(iterations, size)
 
     assert result is not None
     assert isinstance(result, LatencyMetrics)
-    mock_local_model.runtime.benchmark.assert_called_once_with(iterations, size)
+    mock_local_model_onnx.runtime.benchmark.assert_called_once_with(iterations, size)
 
 
 @pytest.fixture
@@ -99,34 +140,34 @@ def mock_sv_detections() -> sv.Detections:
 
 
 def test_annotate_detection_metadata_classes_none(
-    image_ndarray: np.ndarray, mock_local_model: LocalModel, mock_sv_detections
+    image_ndarray: np.ndarray, mock_local_model_onnx: LocalModel, mock_sv_detections
 ):
-    mock_local_model.metadata.classes = None
-    annotated_im = mock_local_model._annotate(image_ndarray, mock_sv_detections)
+    mock_local_model_onnx.metadata.classes = None
+    annotated_im = mock_local_model_onnx._annotate(image_ndarray, mock_sv_detections)
     assert annotated_im is not None
     assert isinstance(annotated_im, np.ndarray)
-    mock_local_model.box_annotator.annotate.assert_called_once()
-    mock_local_model.label_annotator.annotate.assert_called_once()
-    mock_local_model.mask_annotator.annotate.assert_not_called()
+    mock_local_model_onnx.box_annotator.annotate.assert_called_once()
+    mock_local_model_onnx.label_annotator.annotate.assert_called_once()
+    mock_local_model_onnx.mask_annotator.annotate.assert_not_called()
 
 
-def test_annotate_detection(image_ndarray: np.ndarray, mock_local_model: LocalModel, mock_sv_detections):
-    annotated_im = mock_local_model._annotate(image_ndarray, mock_sv_detections)
+def test_annotate_detection(image_ndarray: np.ndarray, mock_local_model_onnx: LocalModel, mock_sv_detections):
+    annotated_im = mock_local_model_onnx._annotate(image_ndarray, mock_sv_detections)
     assert annotated_im is not None
     assert isinstance(annotated_im, np.ndarray)
-    mock_local_model.box_annotator.annotate.assert_called_once()
-    mock_local_model.label_annotator.annotate.assert_called_once()
-    mock_local_model.mask_annotator.annotate.assert_not_called()
+    mock_local_model_onnx.box_annotator.annotate.assert_called_once()
+    mock_local_model_onnx.label_annotator.annotate.assert_called_once()
+    mock_local_model_onnx.mask_annotator.annotate.assert_not_called()
 
 
-def test_annotate_semseg(image_ndarray: np.ndarray, mock_local_model: LocalModel, mock_sv_detections):
-    mock_local_model.metadata.task = FocoosTask.SEMSEG
-    annotated_im = mock_local_model._annotate(image_ndarray, mock_sv_detections)
+def test_annotate_semseg(image_ndarray: np.ndarray, mock_local_model_onnx: LocalModel, mock_sv_detections):
+    mock_local_model_onnx.metadata.task = FocoosTask.SEMSEG
+    annotated_im = mock_local_model_onnx._annotate(image_ndarray, mock_sv_detections)
     assert annotated_im is not None
     assert isinstance(annotated_im, np.ndarray)
-    mock_local_model.box_annotator.annotate.asser_not_called()
-    mock_local_model.label_annotator.annotate.asser_not_called()
-    mock_local_model.mask_annotator.annotate.assert_called_once()
+    mock_local_model_onnx.box_annotator.annotate.asser_not_called()
+    mock_local_model_onnx.label_annotator.annotate.asser_not_called()
+    mock_local_model_onnx.mask_annotator.annotate.assert_called_once()
 
 
 def mock_infer_setup(
@@ -177,7 +218,7 @@ def __call__(self, *args, **kwargs):
 @pytest.mark.parametrize("annotate", [(False, None)])
 def test_infer_(
     mocker,
-    mock_local_model,
+    mock_local_model_onnx,
     image_ndarray,
     mock_sv_detections,
     mock_focoos_detections,
@@ -186,7 +227,7 @@ def test_infer_(
     # Arrange
     *mock_to_call_once, mock_annotate = mock_infer_setup(
         mocker,
-        mock_local_model,
+        mock_local_model_onnx,
         image_ndarray,
         mock_sv_detections,
         mock_focoos_detections,
@@ -194,7 +235,7 @@ def test_infer_(
     )
 
     # Act
-    out, im = mock_local_model.infer(image=image_ndarray, annotate=annotate)
+    out, im = mock_local_model_onnx.infer(image=image_ndarray, annotate=annotate)
 
     # Assertions
     assert out is not None
diff --git a/tests/test_runtime.py b/tests/test_runtime.py
index bf8f1cf..6568015 100644
--- a/tests/test_runtime.py
+++ b/tests/test_runtime.py
@@ -5,8 +5,8 @@
 import pytest
 from pytest_mock import MockerFixture
 
-from focoos.ports import ModelMetadata, OnnxEngineOpts, RuntimeTypes
-from focoos.runtime import ONNXRuntime, det_postprocess, get_runtime, semseg_postprocess
+from focoos.ports import ModelMetadata, OnnxRuntimeOpts, RuntimeTypes, TorchscriptRuntimeOpts
+from focoos.runtime import ONNXRuntime, TorchscriptRuntime, det_postprocess, load_runtime, semseg_postprocess
 
 
 def test_det_post_process():
@@ -78,7 +78,7 @@ def test_semseg_postprocess():
     [
         (
             RuntimeTypes.ONNX_CUDA32,
-            OnnxEngineOpts(
+            OnnxRuntimeOpts(
                 cuda=True,
                 trt=False,
                 fp16=False,
@@ -89,7 +89,7 @@ def test_semseg_postprocess():
         ),
         (
             RuntimeTypes.ONNX_TRT32,
-            OnnxEngineOpts(
+            OnnxRuntimeOpts(
                 cuda=False,
                 trt=True,
                 fp16=False,
@@ -100,7 +100,7 @@ def test_semseg_postprocess():
         ),
         (
             RuntimeTypes.ONNX_TRT16,
-            OnnxEngineOpts(
+            OnnxRuntimeOpts(
                 cuda=False,
                 trt=True,
                 fp16=True,
@@ -111,7 +111,7 @@ def test_semseg_postprocess():
         ),
         (
             RuntimeTypes.ONNX_CPU,
-            OnnxEngineOpts(
+            OnnxRuntimeOpts(
                 cuda=False,
                 trt=False,
                 fp16=False,
@@ -122,7 +122,7 @@ def test_semseg_postprocess():
         ),
         (
             RuntimeTypes.ONNX_COREML,
-            OnnxEngineOpts(
+            OnnxRuntimeOpts(
                 cuda=False,
                 trt=False,
                 fp16=False,
@@ -131,6 +131,14 @@ def test_semseg_postprocess():
                 warmup_iter=2,
             ),
         ),
+        (
+            RuntimeTypes.TORCHSCRIPT_32,
+            TorchscriptRuntimeOpts(
+                warmup_iter=2,
+                optimize_for_inference=True,
+                set_fusion_strategy=True,
+            ),
+        ),
     ],
 )
 def test_get_run_time(mocker: MockerFixture, tmp_path, runtime_type, expected_opts):
@@ -144,14 +152,18 @@ def test_get_run_time(mocker: MockerFixture, tmp_path, runtime_type, expected_op
     mock_model_metadata = MagicMock(spec=ModelMetadata)
 
     # mock opts
-    mock_onnxruntime_class = mocker.patch("focoos.runtime.ONNXRuntime", autospec=True)
-    mock_onnxruntime_class.return_value = MagicMock(spec=ONNXRuntime, opts=expected_opts)
+    if runtime_type == RuntimeTypes.TORCHSCRIPT_32:
+        mock_runtime_class = mocker.patch("focoos.runtime.TorchscriptRuntime", autospec=True)
+        mock_runtime_class.return_value = MagicMock(spec=TorchscriptRuntime, opts=expected_opts)
+    else:
+        mock_runtime_class = mocker.patch("focoos.runtime.ONNXRuntime", autospec=True)
+        mock_runtime_class.return_value = MagicMock(spec=ONNXRuntime, opts=expected_opts)
 
     # warmup_iter
     warmup_iter = 2
 
     # call the function to test
-    onnx_runtime = get_runtime(
+    runtime = load_runtime(
         runtime_type=runtime_type,
         model_path=model_path,
         model_metadata=mock_model_metadata,
@@ -159,8 +171,8 @@ def test_get_run_time(mocker: MockerFixture, tmp_path, runtime_type, expected_op
     )
 
     # assertions
-    assert onnx_runtime is not None
-    mock_onnxruntime_class.assert_called_once_with(
+    assert runtime is not None
+    mock_runtime_class.assert_called_once_with(
         model_path,
         expected_opts,
         mock_model_metadata,