roboflow · stellasphere · Sep 30, 2024 · Oct 1, 2024 · Oct 1, 2024 · Oct 1, 2024
@@ -40,6 +40,7 @@ RUN pip3 install --upgrade pip  && pip3 install \
     -r requirements.transformers.txt \
     jupyterlab \
     wheel>=0.38.0 \
+    setuptools>=65.5.1 \
     --upgrade \
     && rm -rf ~/.cache/pip
 
@@ -74,4 +75,5 @@ ENV API_LOGGING_ENABLED=True
 ENV CORE_MODEL_SAM2_ENABLED=True
 ENV CORE_MODEL_OWLV2_ENABLED=True
 
-ENTRYPOINT uvicorn cpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
+RUN pip install watchdog[watchmedo]
+ENTRYPOINT watchmedo auto-restart --directory=/app/inference --pattern=*.py --recursive -- uvicorn cpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
@@ -0,0 +1,29 @@
+from abc import ABC, abstractmethod
+from typing import Callable, List
+
+from inference.core.workflows.core_steps.common.entities import (
+    StepExecutionMode,
+)
+from inference.core.workflows.execution_engine.entities.base import (
+    Batch,
+    WorkflowImageData,
+)
+from inference.core.workflows.prototypes.block import BlockResult
+
+
+class BaseOCRModel(ABC):
+
+    def __init__(self, model_manager, api_key):
+        self.model_manager = model_manager
+        self.api_key = api_key
+
+    @abstractmethod
+    def run(
+        self,
+        images: Batch[WorkflowImageData],
+        step_execution_mode: StepExecutionMode,
+        post_process_result: Callable[
+            [Batch[WorkflowImageData], List[dict]], BlockResult
+        ],
+    ) -> BlockResult:
+        pass
@@ -0,0 +1,64 @@
+from inference.core.entities.requests.doctr import DoctrOCRInferenceRequest
+from inference.core.workflows.core_steps.common.entities import (
+    StepExecutionMode,
+)
+from inference.core.workflows.core_steps.common.utils import load_core_model
+from inference.core.workflows.execution_engine.entities.base import (
+    Batch,
+    WorkflowImageData,
+)
+from inference.core.workflows.prototypes.block import BlockResult
+from typing import Callable, List
+
+from .base import BaseOCRModel
+
+
+class DoctrOCRModel(BaseOCRModel):
+
+    def run(
+        self,
+        images: Batch[WorkflowImageData],
+        step_execution_mode: StepExecutionMode,
+        post_process_result: Callable[
+            [Batch[WorkflowImageData], List[dict]], BlockResult
+        ],
+    ) -> BlockResult:
+        if step_execution_mode is StepExecutionMode.LOCAL:
+            return self.run_locally(images, post_process_result)
+        elif step_execution_mode is StepExecutionMode.REMOTE:
+            return self.run_remotely(images, post_process_result)
+
+    def run_locally(
+        self,
+        images: Batch[WorkflowImageData],
+        post_process_result: Callable[
+            [Batch[WorkflowImageData], List[dict]], BlockResult
+        ],
+    ) -> BlockResult:
+        predictions = []
+        for single_image in images:
+            inference_request = DoctrOCRInferenceRequest(
+                image=single_image.to_inference_format(numpy_preferred=True),
+                api_key=self.api_key,
+            )
+            doctr_model_id = load_core_model(
+                model_manager=self.model_manager,
+                inference_request=inference_request,
+                core_model="doctr",
+            )
+            result = self.model_manager.infer_from_request_sync(
+                doctr_model_id, inference_request
+            )
+            predictions.append(result.model_dump())
+        return post_process_result(images, predictions)
+
+    def run_remotely(
+        self,
+        images: Batch[WorkflowImageData],
+        post_process_result: Callable[
+            [Batch[WorkflowImageData], List[dict]], BlockResult
+        ],
+    ) -> BlockResult:
+        raise NotImplementedError(
+            "Remote execution is not implemented for DoctrOCRModel."
+        )
@@ -0,0 +1,64 @@
+# models/google_cloud_vision.py
+
+from .base import BaseOCRModel
+from inference.core.workflows.core_steps.common.entities import (
+    StepExecutionMode,
+)
+from inference.core.workflows.execution_engine.entities.base import (
+    Batch,
+    WorkflowImageData,
+)
+from typing import Optional
+import requests
+
+
+class GoogleCloudVisionOCRModel(BaseOCRModel):
+    def __init__(
+        self, model_manager, api_key: Optional[str], google_cloud_api_key: str
+    ):
+        super().__init__(model_manager, api_key)
+        self.google_cloud_api_key = google_cloud_api_key
+
+    def run(
+        self,
+        images: Batch[WorkflowImageData],
+        step_execution_mode: StepExecutionMode,
+        post_process_result,
+    ):
+        predictions = []
+        for image_data in images:
+            encoded_image = image_data.base64_image
+            url = (
+                f"https://vision.googleapis.com/v1/images:annotate"
+                f"?key={self.google_cloud_api_key}"
+            )
+
+            payload = {
+                "requests": [
+                    {
+                        "image": {"content": encoded_image},
+                        "features": [{"type": "TEXT_DETECTION"}],
+                    }
+                ]
+            }
+            # Send the request
+            response = requests.post(url, json=payload)
+            if response.status_code == 200:
+                result = response.json()
+                text_annotations = result["responses"][0].get(
+                    "textAnnotations",
+                    [],
+                )
+                if text_annotations:
+                    text = text_annotations[0]["description"]
+                else:
+                    text = ""
+            else:
+                error_info = response.json().get("error", {})
+                message = error_info.get("message", response.text)
+                raise Exception(
+                    f"Google Cloud Vision API request failed: {message}",
+                )
+            prediction = {"result": text}
+            predictions.append(prediction)
+        return post_process_result(images, predictions)
@@ -0,0 +1,83 @@
+from .base import BaseOCRModel
+from inference.core.workflows.core_steps.common.entities import (
+    StepExecutionMode,
+)
+from inference.core.workflows.execution_engine.entities.base import (
+    Batch,
+    WorkflowImageData,
+)
+from typing import Optional, List, Callable
+from inference.core.workflows.prototypes.block import BlockResult
+
+import requests
+import json
+import base64
+
+
+class MathpixOCRModel(BaseOCRModel):
+    def __init__(
+        self,
+        model_manager,
+        api_key: Optional[str],
+        mathpix_app_id: str,
+        mathpix_app_key: str,
+    ):
+        super().__init__(model_manager, api_key)
+        self.mathpix_app_id = mathpix_app_id
+        self.mathpix_app_key = mathpix_app_key
+
+    def run(
+        self,
+        images: Batch[WorkflowImageData],
+        step_execution_mode: StepExecutionMode,
+        post_process_result: Callable[
+            [Batch[WorkflowImageData], List[dict]], BlockResult
+        ],
+    ) -> BlockResult:
+        predictions = []
+        for image_data in images:
+            # Decode base64 image to bytes
+            image_bytes = base64.b64decode(image_data.base64_image)
+
+            # Prepare the request
+            url = "https://api.mathpix.com/v3/text"
+            headers = {
+                "app_id": self.mathpix_app_id,
+                "app_key": self.mathpix_app_key,
+            }
+            data = {
+                "options_json": json.dumps(
+                    {
+                        "math_inline_delimiters": ["$", "$"],
+                        "rm_spaces": True,
+                    }
+                )
+            }
+            files = {"file": ("image.jpg", image_bytes, "image/jpeg")}
+
+            # Send the request
+            response = requests.post(
+                url,
+                headers=headers,
+                data=data,
+                files=files,
+            )
+
+            if response.status_code == 200:
+                result = response.json()
+                # Extract the text result
+                text = result.get("text", "")
+            else:
+                error_info = response.json().get("error", {})
+                message = error_info.get("message", response.text)
+                detailed_message = error_info.get("detail", "")
+
+                raise Exception(
+                    f"Mathpix API request failed: {message} \n\n"
+                    f"Detailed: {detailed_message}"
+                )
+
+            prediction = {"result": text}
+            predictions.append(prediction)
+
+        return post_process_result(images, predictions)
@@ -0,0 +1,65 @@
+from typing import Callable, List
+
+from inference.core.entities.requests.trocr import TrOCRInferenceRequest
+from inference.core.workflows.core_steps.common.entities import (
+    StepExecutionMode,
+)
+from inference.core.workflows.core_steps.common.utils import load_core_model
+from inference.core.workflows.execution_engine.entities.base import (
+    Batch,
+    WorkflowImageData,
+)
+from inference.core.workflows.prototypes.block import BlockResult
+
+from .base import BaseOCRModel
+
+
+class TrOCRModel(BaseOCRModel):
+
+    def run(
+        self,
+        images: Batch[WorkflowImageData],
+        step_execution_mode: StepExecutionMode,
+        post_process_result: Callable[
+            [Batch[WorkflowImageData], List[dict]], BlockResult
+        ],
+    ) -> BlockResult:
+        if step_execution_mode is StepExecutionMode.LOCAL:
+            return self.run_locally(images, post_process_result)
+        elif step_execution_mode is StepExecutionMode.REMOTE:
+            return self.run_remotely(images, post_process_result)
+
+    def run_locally(
+        self,
+        images: Batch[WorkflowImageData],
+        post_process_result: Callable[
+            [Batch[WorkflowImageData], List[dict]], BlockResult
+        ],
+    ) -> BlockResult:
+        predictions = []
+        for single_image in images:
+            inference_request = TrOCRInferenceRequest(
+                image=single_image.to_inference_format(numpy_preferred=True),
+                api_key=self.api_key,
+            )
+            trocr_model_id = load_core_model(
+                model_manager=self.model_manager,
+                inference_request=inference_request,
+                core_model="trocr",
+            )
+            result = self.model_manager.infer_from_request_sync(
+                trocr_model_id, inference_request
+            )
+            predictions.append(result.model_dump())
+        return post_process_result(images, predictions)
+
+    def run_remotely(
+        self,
+        images: Batch[WorkflowImageData],
+        post_process_result: Callable[
+            [Batch[WorkflowImageData], List[dict]], BlockResult
+        ],
+    ) -> BlockResult:
+        raise NotImplementedError(
+            "Remote execution is not implemented for TrOCRModel.",
+        )