From ead724695e5065428f57467a53bf44dfbe7bbb87 Mon Sep 17 00:00:00 2001 From: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Date: Fri, 21 Jul 2023 08:23:43 +0200 Subject: [PATCH] feat: expand Model API Pydantic types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Miguel Brandão <555migalves555@gmail.com> Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Co-authored-by: Miguel Brandão <555migalves555@gmail.com> --- deepsearch/model/base/controller.py | 36 +++++++---- deepsearch/model/base/model.py | 14 ---- deepsearch/model/base/types.py | 26 ++++++-- .../examples/dummy_nlp_annotator/model.py | 25 ++++---- .../simple_geo_nlp_annotator/model.py | 4 +- deepsearch/model/kinds/nlp/controller.py | 37 ++++++++--- deepsearch/model/kinds/nlp/model.py | 13 ---- deepsearch/model/kinds/nlp/types.py | 64 +++++++++++++++---- deepsearch/model/kinds/qagen/controller.py | 29 +++++++-- deepsearch/model/kinds/qagen/types.py | 28 +++++--- deepsearch/model/server/inference_types.py | 31 ++++++--- deepsearch/model/server/model_app.py | 20 +++--- 12 files changed, 213 insertions(+), 114 deletions(-) diff --git a/deepsearch/model/base/controller.py b/deepsearch/model/base/controller.py index 080e2d59..e3be17ee 100644 --- a/deepsearch/model/base/controller.py +++ b/deepsearch/model/base/controller.py @@ -2,24 +2,34 @@ from typing import Optional from deepsearch.model.base.model import BaseDSModel -from deepsearch.model.base.types import BaseModelConfig -from deepsearch.model.server.inference_types import ControllerInput, ControllerOutput +from deepsearch.model.base.types import BaseModelConfig, BaseModelMetadata +from deepsearch.model.server.inference_types import ( + AppModelInfoOutput, + CtrlPredInput, + CtrlPredOutput, +) class BaseController(ABC): _config: Optional[BaseModelConfig] = None - def get_info(self) -> dict: - model = self._get_model() + @abstractmethod + def get_info(self) -> AppModelInfoOutput: + raise NotImplementedError() + + def _get_api_version(self) -> str: + return "v1" + + def _get_metadata(self) -> BaseModelMetadata: cfg = self._get_config() - result = { # TODO refactor with pydantic - "definitions": { - "apiVersion": "v1", - "kind": cfg.kind, - "spec": model.get_definition_spec(), - } - } - return result + return BaseModelMetadata( + name=cfg.name, + version=cfg.version, + url=cfg.url, + author=cfg.author, + description=cfg.description, + expected_compute_time=cfg.expected_compute_time, + ) def _get_config(self): if self._config is None: @@ -27,7 +37,7 @@ def _get_config(self): return self._config @abstractmethod - def dispatch_predict(self, spec: ControllerInput) -> ControllerOutput: + def dispatch_predict(self, spec: CtrlPredInput) -> CtrlPredOutput: raise NotImplementedError() @abstractmethod diff --git a/deepsearch/model/base/model.py b/deepsearch/model/base/model.py index 8fb1d535..5c519590 100644 --- a/deepsearch/model/base/model.py +++ b/deepsearch/model/base/model.py @@ -7,17 +7,3 @@ class BaseDSModel(ABC): @abstractmethod def get_config(self) -> BaseModelConfig: raise NotImplementedError() - - def get_definition_spec(self) -> dict: - cfg = self.get_config() - spec = { # TODO refactor with pydantic - "metadata": { - "name": cfg.name, - "version": cfg.version, - "url": cfg.url, - "author": cfg.author, - "description": cfg.description, - "expected_compute_time": cfg.expected_compute_time, - }, - } - return spec diff --git a/deepsearch/model/base/types.py b/deepsearch/model/base/types.py index edb89b6f..f0cbacf9 100644 --- a/deepsearch/model/base/types.py +++ b/deepsearch/model/base/types.py @@ -1,6 +1,6 @@ from datetime import datetime from enum import Enum -from typing import Any, Optional +from typing import Any, Dict, Optional from pydantic import BaseModel, Extra, Field, PositiveFloat @@ -34,18 +34,36 @@ class Metadata(StrictModel): annotations: Annotations -class BaseInfReq(StrictModel): +class BaseAppPredInput(StrictModel): apiVersion: str kind: Kind metadata: Metadata spec: Any -class BaseModelConfig(StrictModel): - kind: Kind +class BaseModelMetadata(StrictModel): name: str version: str url: Optional[str] = None author: Optional[str] = None description: Optional[str] = None expected_compute_time: Optional[PositiveFloat] = None + + +class BaseModelConfig(BaseModelMetadata): + kind: Kind + + +class ModelInfoOutputDefsSpec(BaseModel): + definition: Dict + metadata: BaseModelMetadata + + +class CtrlInfoOutputDefs(BaseModel): + apiVersion: str + kind: Kind + spec: ModelInfoOutputDefsSpec + + +class CtrlInfoOutput(BaseModel): + definitions: CtrlInfoOutputDefs diff --git a/deepsearch/model/examples/dummy_nlp_annotator/model.py b/deepsearch/model/examples/dummy_nlp_annotator/model.py index 92f37147..abc3ff89 100644 --- a/deepsearch/model/examples/dummy_nlp_annotator/model.py +++ b/deepsearch/model/examples/dummy_nlp_annotator/model.py @@ -5,6 +5,7 @@ from deepsearch.model.base.types import Kind from deepsearch.model.kinds.nlp.model import BaseNLPModel from deepsearch.model.kinds.nlp.types import ( + AnnotateEntitiesEntry, AnnotateEntitiesOutput, AnnotatePropertiesOutput, AnnotateRelationshipsOutput, @@ -41,18 +42,18 @@ def annotate_batched_entities( results.append( { k: [ - { - "type": k, - "match": f"a '{k}' match in '{item}'", - "original": f"a '{k}' original in '{item}'", - "range": [1, 5], - }, - { - "type": k, - "match": f"another '{k}' match in '{item}'", - "original": f"another '{k}' original in '{item}'", - "range": [12, 42], - }, + AnnotateEntitiesEntry( + type=k, + match=f"a '{k}' match in '{item}'", + original=f"a '{k}' original in '{item}'", + range=[1, 5], + ), + AnnotateEntitiesEntry( + type=k, + match=f"another '{k}' match in '{item}'", + original=f"another '{k}' original in '{item}'", + range=[12, 42], + ), ] for k in _entity_names } diff --git a/deepsearch/model/examples/simple_geo_nlp_annotator/model.py b/deepsearch/model/examples/simple_geo_nlp_annotator/model.py index 47d04585..9621e767 100644 --- a/deepsearch/model/examples/simple_geo_nlp_annotator/model.py +++ b/deepsearch/model/examples/simple_geo_nlp_annotator/model.py @@ -157,7 +157,7 @@ def annotate_batched_entities( def _annotate_entities_in_item( self, object_type: str, item: str, entity_names: Optional[List[str]] - ) -> List[dict]: + ) -> List: # In this case entity_names is never None, however since BaseAnnotator defines the signature of this method as # Optionally having entity names we must ensure that they are defined. if entity_names is None: @@ -200,7 +200,7 @@ def annotate_batched_relationships( if relation in self.relationship_names: result[relation] = self._rel_annots[ relation - ].annotate_relationships_text(text, entity_map) + ].annotate_relationships_text(text, entity_map, relation) if result: results.append(result) diff --git a/deepsearch/model/kinds/nlp/controller.py b/deepsearch/model/kinds/nlp/controller.py index 0c096b27..c98de4b2 100644 --- a/deepsearch/model/kinds/nlp/controller.py +++ b/deepsearch/model/kinds/nlp/controller.py @@ -8,27 +8,48 @@ FindEntitiesText, FindPropertiesText, FindRelationshipsText, - NLPEntitiesControllerOutput, NLPEntitiesReqSpec, - NLPPropertiesControllerOutput, + NLPEntsCtrlPredOuput, + NLPInfoOutput, + NLPInfoOutputDefinitions, + NLPInfoOutputDefinitionsSpec, + NLPModelMetadata, NLPPropertiesReqSpec, - NLPRelationshipsControllerOutput, + NLPPropsCtrlPredOutput, NLPRelationshipsReqSpec, + NLPRelsCtrlPredOutput, ) -from deepsearch.model.server.inference_types import ControllerInput, ControllerOutput +from deepsearch.model.server.inference_types import CtrlPredInput, CtrlPredOutput class NLPController(BaseController): def __init__(self, model: BaseNLPModel): self._model = model + def get_info(self) -> NLPInfoOutput: + cfg = self._model.get_nlp_config() + metadata = NLPModelMetadata( + supported_object_types=cfg.supported_types, + **self._get_metadata().dict(), # passing parent metadata dict as kwargs + ) + spec = NLPInfoOutputDefinitionsSpec( + definition=cfg.labels, + metadata=metadata, + ) + definitions = NLPInfoOutputDefinitions( + apiVersion=self._get_api_version(), + kind=self.get_kind(), + spec=spec, + ) + return NLPInfoOutput(definitions=definitions) + def get_kind(self) -> str: return Kind.NLPModel def _get_model(self) -> BaseDSModel: return self._model - def dispatch_predict(self, spec: ControllerInput) -> ControllerOutput: + def dispatch_predict(self, spec: CtrlPredInput) -> CtrlPredOutput: cfg = self._model.get_nlp_config() type_ok = True @@ -43,7 +64,7 @@ def dispatch_predict(self, spec: ControllerInput) -> ControllerOutput: items=spec.findEntities.texts, entity_names=spec.findEntities.entityNames, ) - return NLPEntitiesControllerOutput(entities=entities) + return NLPEntsCtrlPredOuput(entities=entities) elif ( isinstance(spec, NLPRelationshipsReqSpec) and isinstance(spec.findRelationships, FindRelationshipsText) @@ -55,7 +76,7 @@ def dispatch_predict(self, spec: ControllerInput) -> ControllerOutput: entities=spec.findRelationships.entities, relationship_names=spec.findRelationships.relationshipNames, ) - return NLPRelationshipsControllerOutput(relationships=relationships) + return NLPRelsCtrlPredOutput(relationships=relationships) elif ( isinstance(spec, NLPPropertiesReqSpec) and isinstance(spec.findProperties, FindPropertiesText) @@ -71,7 +92,7 @@ def dispatch_predict(self, spec: ControllerInput) -> ControllerOutput: entities=entities, property_names=spec.findProperties.propertyNames, ) - return NLPPropertiesControllerOutput(properties=properties) + return NLPPropsCtrlPredOutput(properties=properties) elif not type_ok: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, diff --git a/deepsearch/model/kinds/nlp/model.py b/deepsearch/model/kinds/nlp/model.py index 08d846b0..bcacd12e 100644 --- a/deepsearch/model/kinds/nlp/model.py +++ b/deepsearch/model/kinds/nlp/model.py @@ -1,5 +1,4 @@ from abc import abstractmethod -from copy import deepcopy from typing import List, Optional from deepsearch.model.base.model import BaseDSModel @@ -13,8 +12,6 @@ class BaseNLPModel(BaseDSModel): - _cached_def_spec: dict = {} - @abstractmethod def annotate_batched_entities( self, @@ -44,16 +41,6 @@ def annotate_batched_properties( ) -> AnnotatePropertiesOutput: raise NotImplementedError() - def get_definition_spec(self) -> dict: - cfg = self.get_nlp_config() - if not self._cached_def_spec: - self._cached_def_spec = deepcopy(super().get_definition_spec()) - self._cached_def_spec["definition"] = cfg.labels - self._cached_def_spec["metadata"][ - "supported_object_types" - ] = cfg.supported_types - return self._cached_def_spec - @abstractmethod def get_nlp_config(self) -> NLPConfig: raise NotImplementedError() diff --git a/deepsearch/model/kinds/nlp/types.py b/deepsearch/model/kinds/nlp/types.py index ef1de63e..ed4a57cc 100644 --- a/deepsearch/model/kinds/nlp/types.py +++ b/deepsearch/model/kinds/nlp/types.py @@ -1,7 +1,16 @@ from enum import Enum -from typing import List, Literal, Optional, Union +from typing import Dict, List, Literal, Optional, Union -from deepsearch.model.base.types import BaseInfReq, BaseModelConfig, Kind, StrictModel +from deepsearch.model.base.types import ( + BaseAppPredInput, + BaseModelConfig, + BaseModelMetadata, + CtrlInfoOutput, + CtrlInfoOutputDefs, + Kind, + ModelInfoOutputDefsSpec, + StrictModel, +) class NLPType(str, Enum): @@ -48,7 +57,7 @@ class NLPRelationshipsReqSpec(StrictModel): ] -class NLPRequest(BaseInfReq): +class NLPAppPredInput(BaseAppPredInput): kind: Literal[Kind.NLPModel] spec: NLPReqSpec @@ -80,29 +89,39 @@ class AnnotationLabels(StrictModel): properties: List[PropertyLabel] -# TODO Annotate*Input pydantic models needed? +class AnnotateEntitiesEntry(StrictModel): + type: str + match: str + original: str + range: List[int] -AnnotateEntitiesOutput = List[dict] # TODO provide real implementation -AnnotateRelationshipsOutput = List[dict] # TODO provide real implementation -AnnotatePropertiesOutput = List[dict] # TODO provide real implementation +class AnnotateRelationshipsEntry(StrictModel): + header: list + data: list -class NLPEntitiesControllerOutput(StrictModel): + +AnnotateEntitiesOutput = List[Dict[str, List[AnnotateEntitiesEntry]]] +AnnotateRelationshipsOutput = List[Dict[str, AnnotateRelationshipsEntry]] +AnnotatePropertiesOutput = List[Dict] # TODO specify + + +class NLPEntsCtrlPredOuput(StrictModel): entities: AnnotateEntitiesOutput -class NLPRelationshipsControllerOutput(StrictModel): +class NLPRelsCtrlPredOutput(StrictModel): relationships: AnnotateRelationshipsOutput -class NLPPropertiesControllerOutput(StrictModel): +class NLPPropsCtrlPredOutput(StrictModel): properties: AnnotatePropertiesOutput -NLPControllerOutput = Union[ - NLPEntitiesControllerOutput, - NLPRelationshipsControllerOutput, - NLPPropertiesControllerOutput, +NLPCtrlPredOutput = Union[ + NLPEntsCtrlPredOuput, + NLPRelsCtrlPredOutput, + NLPPropsCtrlPredOutput, ] @@ -110,3 +129,20 @@ class NLPConfig(BaseModelConfig): kind: Literal[Kind.NLPModel] supported_types: List[NLPType] labels: AnnotationLabels + + +class NLPModelMetadata(BaseModelMetadata): + supported_object_types: List[Literal["text", "table", "image"]] + + +class NLPInfoOutputDefinitionsSpec(ModelInfoOutputDefsSpec): + metadata: NLPModelMetadata + + +class NLPInfoOutputDefinitions(CtrlInfoOutputDefs): + kind: Literal[Kind.NLPModel] + spec: NLPInfoOutputDefinitionsSpec + + +class NLPInfoOutput(CtrlInfoOutput): + definitions: NLPInfoOutputDefinitions diff --git a/deepsearch/model/kinds/qagen/controller.py b/deepsearch/model/kinds/qagen/controller.py index 976c3598..d0be4836 100644 --- a/deepsearch/model/kinds/qagen/controller.py +++ b/deepsearch/model/kinds/qagen/controller.py @@ -1,32 +1,47 @@ -from __future__ import annotations - from fastapi import HTTPException, status from deepsearch.model.base.controller import BaseController from deepsearch.model.base.model import BaseDSModel -from deepsearch.model.base.types import Kind +from deepsearch.model.base.types import Kind, ModelInfoOutputDefsSpec from deepsearch.model.kinds.qagen.model import BaseQAGenerator -from deepsearch.model.kinds.qagen.types import QAGenControllerOutput, QAGenReqSpec -from deepsearch.model.server.inference_types import ControllerInput, ControllerOutput +from deepsearch.model.kinds.qagen.types import ( + QAGenCtrlPredOutput, + QAGenInfoOutput, + QAGenInfoOutputDefinitions, + QAGenReqSpec, +) +from deepsearch.model.server.inference_types import CtrlPredInput, CtrlPredOutput class QAGenController(BaseController): def __init__(self, model: BaseQAGenerator): self._model = model + def get_info(self) -> QAGenInfoOutput: + spec = ModelInfoOutputDefsSpec( + definition={}, + metadata=self._get_metadata(), + ) + definitions = QAGenInfoOutputDefinitions( + apiVersion=super()._get_api_version(), + kind=self.get_kind(), + spec=spec, + ) + return QAGenInfoOutput(definitions=definitions) + def _get_model(self) -> BaseDSModel: return self._model def get_kind(self) -> str: return Kind.QAGenModel - def dispatch_predict(self, spec: ControllerInput) -> ControllerOutput: + def dispatch_predict(self, spec: CtrlPredInput) -> CtrlPredOutput: if isinstance(spec, QAGenReqSpec): gen_answers = spec.generateAnswers answers = self._model.generate_answers( [(c, q) for c, q in zip(gen_answers.contexts, gen_answers.questions)] ) - return QAGenControllerOutput( + return QAGenCtrlPredOutput( answers=answers, ) else: diff --git a/deepsearch/model/kinds/qagen/types.py b/deepsearch/model/kinds/qagen/types.py index a35fc096..451bae3e 100644 --- a/deepsearch/model/kinds/qagen/types.py +++ b/deepsearch/model/kinds/qagen/types.py @@ -2,10 +2,16 @@ from pydantic import root_validator -from deepsearch.model.base.types import BaseInfReq, BaseModelConfig, Kind, StrictModel +from deepsearch.model.base.types import ( + BaseAppPredInput, + BaseModelConfig, + CtrlInfoOutputDefs, + Kind, + StrictModel, +) -class GenerateAnswers(StrictModel): # TODO rename? +class GenerateAnswers(StrictModel): contexts: List[List[str]] questions: List[str] @@ -18,22 +24,28 @@ def check_lengths_match(cls, values): class QAGenReqSpec(StrictModel): - generateAnswers: GenerateAnswers # TODO rename? + generateAnswers: GenerateAnswers -class QAGenRequest(BaseInfReq): +class QAGenAppPredInput(BaseAppPredInput): kind: Literal[Kind.QAGenModel] spec: QAGenReqSpec -# TODO GenerateAnswersInput pydantic model needed? +GenerateAnswersOutput = List[str] -GenerateAnswersOutput = List[str] # TODO provide real implementation - -class QAGenControllerOutput(StrictModel): +class QAGenCtrlPredOutput(StrictModel): answers: GenerateAnswersOutput class QAGenConfig(BaseModelConfig): kind: Literal[Kind.QAGenModel] + + +class QAGenInfoOutputDefinitions(CtrlInfoOutputDefs): + kind: Literal[Kind.QAGenModel] + + +class QAGenInfoOutput(StrictModel): + definitions: QAGenInfoOutputDefinitions diff --git a/deepsearch/model/server/inference_types.py b/deepsearch/model/server/inference_types.py index 0e2e074b..0e936d8f 100644 --- a/deepsearch/model/server/inference_types.py +++ b/deepsearch/model/server/inference_types.py @@ -1,23 +1,34 @@ from typing import Union -from deepsearch.model.kinds.nlp.types import NLPControllerOutput, NLPReqSpec, NLPRequest +from deepsearch.model.kinds.nlp.types import ( + NLPAppPredInput, + NLPCtrlPredOutput, + NLPInfoOutput, + NLPReqSpec, +) from deepsearch.model.kinds.qagen.types import ( - QAGenControllerOutput, + QAGenAppPredInput, + QAGenCtrlPredOutput, + QAGenInfoOutput, QAGenReqSpec, - QAGenRequest, ) -AppInferenceInput = Union[ - NLPRequest, - QAGenRequest, +AppPredInput = Union[ + NLPAppPredInput, + QAGenAppPredInput, ] -ControllerInput = Union[ +CtrlPredInput = Union[ NLPReqSpec, QAGenReqSpec, ] -ControllerOutput = Union[ - NLPControllerOutput, - QAGenControllerOutput, +CtrlPredOutput = Union[ + NLPCtrlPredOutput, + QAGenCtrlPredOutput, +] + +AppModelInfoOutput = Union[ + NLPInfoOutput, + QAGenInfoOutput, ] diff --git a/deepsearch/model/server/model_app.py b/deepsearch/model/server/model_app.py index 47da31e6..3f8a69f9 100644 --- a/deepsearch/model/server/model_app.py +++ b/deepsearch/model/server/model_app.py @@ -18,7 +18,7 @@ from deepsearch.model.base.model import BaseDSModel from deepsearch.model.server.config import Settings from deepsearch.model.server.controller_factory import ControllerFactory -from deepsearch.model.server.inference_types import AppInferenceInput +from deepsearch.model.server.inference_types import AppModelInfoOutput, AppPredInput logger = logging.getLogger("cps-fastapi") @@ -50,20 +50,24 @@ async def health_check() -> dict: return {"message": "HealthCheck"} @self.app.get("/") - async def get_definitions(api_key=Depends(self._auth)) -> dict: + async def get_definitions( + api_key=Depends(self._auth), + ) -> Dict[str, AppModelInfoOutput]: return { name: controller.get_info() for name, controller in self._controllers.items() } @self.app.get("/model/{model_name}") - async def get_model_specs(model_name: str, api_key=Depends(self._auth)) -> dict: + async def get_model_specs( + model_name: str, api_key=Depends(self._auth) + ) -> AppModelInfoOutput: controller = self._get_controller(model_name=model_name) return controller.get_info() @self.app.post("/model/{model_name}/predict", response_model=None) async def predict( - model_name: str, request: AppInferenceInput, api_key=Depends(self._auth) + model_name: str, request: AppPredInput, api_key=Depends(self._auth) ) -> JSONResponse: request_arrival_time = time.time() try: @@ -124,9 +128,7 @@ async def predict( async def _run_in_process(fn, *args) -> JSONResponse: return await run_in_threadpool(fn, *args) - def _inference_process( - model_name: str, request: AppInferenceInput - ) -> JSONResponse: + def _inference_process(model_name: str, request: AppPredInput) -> JSONResponse: request_dict = request.dict() start_time = time.time() controller = self._get_controller(model_name=model_name) @@ -174,7 +176,7 @@ def register_model( model: BaseDSModel, name: Optional[str] = None, controller: Optional[BaseController] = None, - ): + ) -> None: """Registers a model with the app. Args: @@ -197,7 +199,7 @@ def _validate_controller_kind( raise RuntimeError("Controller kind does not match model") def _validate_request_kind( - self, request: AppInferenceInput, controller: BaseController + self, request: AppPredInput, controller: BaseController ) -> None: if request.kind != controller.get_kind(): raise HTTPException(