From 4c6208a2d0c96edb9bf2dc84aa5e219d5516ad4c Mon Sep 17 00:00:00 2001 From: Adriano Rutz Date: Thu, 14 Mar 2024 00:26:38 +0100 Subject: [PATCH] #25 Returning SDF outside the structure object --- api/api.py | 10 +++++++++- api/models.py | 2 +- api/queries.py | 4 +--- doc/api/schemas/structureObject.yaml | 5 ----- doc/api/schemas/structureResult.yaml | 5 +++++ model/data_model.py | 6 ------ tests/test_api_structures.py | 17 +++++++++++++++-- 7 files changed, 31 insertions(+), 18 deletions(-) diff --git a/api/api.py b/api/api.py index fd7a295..3478018 100644 --- a/api/api.py +++ b/api/api.py @@ -118,7 +118,15 @@ async def search_structures( ) -> StructureResult: dict_items = get_structures_for_item(item, dm) - if item.modeEnum == "objects": + if item.structure.option.sdf: + return StructureResult( + ids=dict_items.keys(), + objects={sid: value for sid, value in dict_items.items()}, + sdf=dm.get_structure_sdf_from_dict_of_sids(dict_items), + description="Structures matching the query", + count=len(dict_items), + ) + elif item.modeEnum == "objects": return StructureResult( ids=dict_items.keys(), objects={sid: value for sid, value in dict_items.items()}, diff --git a/api/models.py b/api/models.py index ba5bf01..8c7b990 100644 --- a/api/models.py +++ b/api/models.py @@ -110,12 +110,12 @@ class StructureObject(BaseModel): inchikey_no_stereo: str formula: str descriptors: Optional[Dict] = None - sdf: Optional[str] = None class StructureResult(BaseModel): ids: List[int] objects: Optional[Dict[int, StructureObject]] = None + sdf: Optional[str] = None count: Optional[int] description: Optional[str] diff --git a/api/queries.py b/api/queries.py index 5ad599e..926737e 100644 --- a/api/queries.py +++ b/api/queries.py @@ -137,8 +137,6 @@ def structures_from_structure_in_item(dm: DataModel, item: Item) -> set[int] | N detail=f"The formula given is invalid: {formula}", ) - # TODO if sdf - # TODO if desc(riptors) return structures @@ -278,7 +276,7 @@ def get_structures_for_item(item: Item, dm: DataModel) -> dict[int, str]: ) return dm.get_structure_object_from_dict_of_sids( - ids, item.structure.option.descriptors, item.structure.option.sdf + ids, item.structure.option.descriptors ) diff --git a/doc/api/schemas/structureObject.yaml b/doc/api/schemas/structureObject.yaml index 95d1b35..cccafa7 100644 --- a/doc/api/schemas/structureObject.yaml +++ b/doc/api/schemas/structureObject.yaml @@ -30,11 +30,6 @@ properties: Molecular formula example: "C16H22O9" type: string - sdf: - description: | - SDF - example: "\n RDKit 2D\n\n 1 0 0 0 0 0 0 0 0 0999 V2000\n 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\nM END\n> (3) \n3\n\n" - type: string smiles: description: | SMILES (with stereochemistry) diff --git a/doc/api/schemas/structureResult.yaml b/doc/api/schemas/structureResult.yaml index 8a35d8e..3923f25 100644 --- a/doc/api/schemas/structureResult.yaml +++ b/doc/api/schemas/structureResult.yaml @@ -11,6 +11,11 @@ properties: type: array objects: $ref: "./structureObject.yaml" + sdf: + description: | + SDF + example: "\n RDKit 2D\n\n 1 0 0 0 0 0 0 0 0 0999 V2000\n 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\nM END\n> (3) \n3\n\n" + type: string count: description: | Count diff --git a/model/data_model.py b/model/data_model.py index bf338b4..b6d5796 100644 --- a/model/data_model.py +++ b/model/data_model.py @@ -166,7 +166,6 @@ def get_structure_object_from_dict_of_sids( self, sids: Iterable[int], descriptors: bool | dict = False, - sdf: bool = False, ) -> dict[int, StructureObject]: with self.storage.session() as session: if descriptors == True: @@ -200,10 +199,6 @@ def get_structure_object_from_dict_of_sids( .filter(Structures.id.in_(sids)) .all() ) - if sdf: - blocks = self.get_structure_sdf_from_dict_of_sids(sids) - else: - blocks = None if result: return { row.id: StructureObject( @@ -214,7 +209,6 @@ def get_structure_object_from_dict_of_sids( inchikey=row.inchikey, inchikey_no_stereo=row.inchikey_no_stereo, formula=row.formula, - sdf=blocks, ) for row in result } diff --git a/tests/test_api_structures.py b/tests/test_api_structures.py index e7861c5..9c69f0c 100644 --- a/tests/test_api_structures.py +++ b/tests/test_api_structures.py @@ -115,11 +115,24 @@ async def test_search_structures_sdf(self, data_model): result = await search_structures(item=item, dm=data_model) assert result.count == 1 assert ( - result.objects[3].sdf + result.sdf == "\n RDKit 2D\n\n 1 0 0 0 0 0 0 0 0 0999 V2000\n 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\nM END\n> (3) \n3\n\n" ) - # TODO test for multiple ones (SDF) + async def test_search_structures_sdf_multiple(self, data_model): + item = Item( + structure={ + "molecule": "C([H])([H])([H])", + "option": {"sdf": True, "substructure_search": True}, + }, + limit=10, + modeEnum="objects", + ) + result = await search_structures(item=item, dm=data_model) + assert ( + result.sdf + == "\n RDKit 2D\n\n 4 3 0 0 0 0 0 0 0 0999 V2000\n 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.2990 0.7500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 2.5981 -0.0000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n 1.2990 2.2500 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 2 1 1 1\n 2 3 1 0\n 2 4 1 0\nM END\n> (1) \n1\n\n\n RDKit 2D\n\n 4 3 0 0 0 0 0 0 0 0999 V2000\n 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.2990 0.7500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 2.5981 -0.0000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n 1.2990 2.2500 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 2 1 1 6\n 2 3 1 0\n 2 4 1 0\nM END\n> (2) \n2\n\n\n RDKit 2D\n\n 1 0 0 0 0 0 0 0 0 0999 V2000\n 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\nM END\n> (3) \n3\n\n" + ) async def test_search_structures_by_substructure_limits(self, data_model): item = Item(