From df3772826c12a8594e9fb92cfa618714606e1d7d Mon Sep 17 00:00:00 2001 From: Andrew Prokhorenkov Date: Wed, 18 Dec 2024 16:47:32 -0600 Subject: [PATCH 01/20] feat: initial TCIAAdapter --- src/mds/agg_mds/adapters.py | 100 ++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/src/mds/agg_mds/adapters.py b/src/mds/agg_mds/adapters.py index 069221d7..3eb0c1b1 100644 --- a/src/mds/agg_mds/adapters.py +++ b/src/mds/agg_mds/adapters.py @@ -1581,6 +1581,105 @@ def normalizeToGen3MDSFields(self, data, **kwargs) -> Dict[str, Any]: return results +class TCIAAdapter(RemoteMetadataAdapter): + """ + Simple adapter for TCIA (cancerimagingarchive.net) + """ + + @retry( + stop=stop_after_attempt(5), + retry=retry_if_exception_type(httpx.TimeoutException), + wait=wait_random_exponential(multiplier=1, max=10), + ) + def getRemoteDataAsJson(self, **kwargs) -> Dict: + results = {"results": []} + + mds_url = kwargs.get("mds_url", None) + if mds_url is None: + return results + + try: + response = httpx.get(mds_url) + response.raise_for_status() + + response_data = response.json() + results["results"] = response_data + + except httpx.TimeoutException as exc: + logger.error(f"An timeout error occurred while requesting {mds_url}.") + raise + except httpx.HTTPError as exc: + logger.error( + f"An HTTP error {exc.response.status_code if exc.response is not None else ''} occurred while requesting {exc.request.url}. Returning {len(results['results'])} results" + ) + except Exception as exc: + logger.error( + f"An error occurred while requesting {mds_url} {exc}. Returning {len(results['results'])} results." + ) + + return results + + @staticmethod + def addGen3ExpectedFields( + item, mappings, keepOriginalFields, globalFieldFilters, schema + ): + """ + Map item fields to gen3 normalized fields + using the mapping and adding the location + """ + results = item + if mappings is not None: + mapped_fields = RemoteMetadataAdapter.mapFields( + item, mappings, globalFieldFilters, schema + ) + if keepOriginalFields: + results.update(mapped_fields) + else: + results = mapped_fields + + return results + + def normalizeToGen3MDSFields(self, data, **kwargs) -> Dict[str, Any]: + """ + Iterates over the response. + :param data: + :return: + """ + mappings = kwargs.get("mappings", None) + keepOriginalFields = kwargs.get("keepOriginalFields", False) + globalFieldFilters = kwargs.get("globalFieldFilters", []) + schema = kwargs.get("schema", {}) + + results = {} + for item in data["results"]: + normalized_item = TCIAAdapter.addGen3ExpectedFields( + item, + mappings, + keepOriginalFields, + globalFieldFilters, + schema, + ) + + normalized_item[ + "description" + ] = f"TCIA data from collection: {normalized_item['Collection']}." + + normalized_item["tags"] = [ + { + "name": normalized_item[tag] if normalized_item[tag] else "", + "category": tag, + } + for tag in ["disease_type", "data_type", "primary_site"] + ] + + results[normalized_item["_unique_id"]] = { + "_guid_type": "discovery_metadata", + "gen3_discovery": normalized_item, + } + + return results + + def gather_metadata( gather, mds_url, @@ -1627,6 +1726,7 @@ def gather_metadata( "gdc": GDCAdapter, "cidc": CIDCAdapter, "pdc": PDCAdapter, + "tcia": TCIAAdapter, } From 0e1615fdb00ef4fa65edbb573c90129878d238cc Mon Sep 17 00:00:00 2001 From: Andrew Prokhorenkov Date: Thu, 19 Dec 2024 09:20:13 -0600 Subject: [PATCH 02/20] feat: updates for fields --- src/mds/agg_mds/adapters.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/mds/agg_mds/adapters.py b/src/mds/agg_mds/adapters.py index 3eb0c1b1..a72d6ace 100644 --- a/src/mds/agg_mds/adapters.py +++ b/src/mds/agg_mds/adapters.py @@ -1662,17 +1662,19 @@ def normalizeToGen3MDSFields(self, data, **kwargs) -> Dict[str, Any]: normalized_item[ "description" - ] = f"TCIA data from collection: {normalized_item['Collection']}." + ] = f"TCIA data from collection: {normalized_item['Collection']}, study: {normalized_item['StudyDesc']}." normalized_item["tags"] = [ { "name": normalized_item[tag] if normalized_item[tag] else "", "category": tag, } - for tag in ["disease_type", "data_type", "primary_site"] + for tag in ["Collection", "StudyDesc", "StudyDate"] ] - results[normalized_item["_unique_id"]] = { + unique_id_field = "SeriesInstanceUID" if "SeriesInstanceUID" in normalized_item else "SeriesInstanceUID" + + results[normalized_item[unique_id_field]] = { "_guid_type": "discovery_metadata", "gen3_discovery": normalized_item, } From 4e6f5a9869badc2790f0cb646aeaa5248833b7d2 Mon Sep 17 00:00:00 2001 From: Andrew Prokhorenkov Date: Thu, 19 Dec 2024 13:16:22 -0600 Subject: [PATCH 03/20] test: add tests for TCIA Adapter --- tests/test_agg_mds_tcia_adapter.py | 95 ++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 tests/test_agg_mds_tcia_adapter.py diff --git a/tests/test_agg_mds_tcia_adapter.py b/tests/test_agg_mds_tcia_adapter.py new file mode 100644 index 00000000..3a4ba2af --- /dev/null +++ b/tests/test_agg_mds_tcia_adapter.py @@ -0,0 +1,95 @@ +import respx +import httpx + +from mds.agg_mds.adapters import get_metadata + + +@respx.mock +def test_get_metadata_tcia(): + pid_response = """ + [ + { + "StudyInstanceUID": "study_id_1", + "StudyDate": "", + "StudyDescription": "", + "PatientAge": "", + "PatientID": "", + "PatientName": "", + "PatientSex": "", + "EthnicGroup": "", + "Collection": "Collection1", + "SeriesCount": 1, + "LongitudinalTemporalEventType": "", + "LongitudinalTemporalOffsetFromEvent": 0 + }, + { + "StudyInstanceUID": "study_id_2", + "StudyDate": "", + "StudyDescription": "", + "PatientAge": "", + "PatientID": "", + "PatientName": "", + "PatientSex": "", + "EthnicGroup": "", + "Collection": "Collection2", + "SeriesCount": 2, + "LongitudinalTemporalEventType": "", + "LongitudinalTemporalOffsetFromEvent": 1 + } + ] + """ + + field_mappings = { + "commons": "TCIA", + "study_title": "path:StudyDescription", + "program_name": "path:Collection", + "description": "", + "tags": [], + } + + respx.post( + "http://test/ok", + ).mock(return_value=httpx.Response(status_code=200, content=pid_response)) + + filters = {"size": 5} + + assert get_metadata("pdc", None, filters=filters, mappings=field_mappings) == {} + + assert get_metadata( + "tcia", "http://test/ok", filters=filters, mappings=field_mappings + ) == { + "data": { + "study_id_1": [ + { + "StudyInstanceUID": "study_id_1", + "StudyDate": "", + "StudyDescription": "", + "PatientAge": "", + "PatientID": "", + "PatientName": "", + "PatientSex": "", + "EthnicGroup": "", + "Collection": "Collection1", + "SeriesCount": 1, + "LongitudinalTemporalEventType": "", + "LongitudinalTemporalOffsetFromEvent": 0, + } + ], + "study_id_2": [ + { + "StudyInstanceUID": "study_id_1", + "StudyDate": "", + "StudyDescription": "", + "PatientAge": "", + "PatientID": "", + "PatientName": "", + "PatientSex": "", + "EthnicGroup": "", + "Collection": "Collection2", + "SeriesCount": 2, + "LongitudinalTemporalEventType": "", + "LongitudinalTemporalOffsetFromEvent": 1, + } + ], + } + } From 2e64eab1526b381b17d5a5b7984d3e97aace7139 Mon Sep 17 00:00:00 2001 From: Andrew Prokhorenkov Date: Thu, 19 Dec 2024 13:23:32 -0600 Subject: [PATCH 04/20] test: remove test (TCIA doesn't support filters? or does it?) --- tests/test_agg_mds_tcia_adapter.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_agg_mds_tcia_adapter.py b/tests/test_agg_mds_tcia_adapter.py index 3a4ba2af..059e26be 100644 --- a/tests/test_agg_mds_tcia_adapter.py +++ b/tests/test_agg_mds_tcia_adapter.py @@ -53,8 +53,6 @@ def test_get_metadata_tcia(): filters = {"size": 5} - assert get_metadata("pdc", None, filters=filters, mappings=field_mappings) == {} - assert get_metadata( "tcia", "http://test/ok", filters=filters, mappings=field_mappings ) == { From 3d1f466e8ee20f2fa4c385bf328ae4b0c1e2a6b7 Mon Sep 17 00:00:00 2001 From: Andrew Prokhorenkov Date: Thu, 19 Dec 2024 13:58:02 -0600 Subject: [PATCH 05/20] test: fixes --- tests/test_agg_mds_tcia_adapter.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/test_agg_mds_tcia_adapter.py b/tests/test_agg_mds_tcia_adapter.py index 059e26be..7d467677 100644 --- a/tests/test_agg_mds_tcia_adapter.py +++ b/tests/test_agg_mds_tcia_adapter.py @@ -6,7 +6,7 @@ @respx.mock def test_get_metadata_tcia(): - pid_response = """ + tcia_response = """ [ { "StudyInstanceUID": "study_id_1", @@ -47,12 +47,14 @@ def test_get_metadata_tcia(): "tags": [], } - respx.post( + respx.get( "http://test/ok", - ).mock(return_value=httpx.Response(status_code=200, content=pid_response)) + ).mock(return_value=httpx.Response(status_code=200, content=tcia_response)) filters = {"size": 5} + assert get_metadata("tcia", "http://test/ok", filters=None, config=None) == {} + assert get_metadata( "tcia", "http://test/ok", filters=filters, mappings=field_mappings ) == { From 77aa3ef8268e9bf70f96baa066426362c1440eca Mon Sep 17 00:00:00 2001 From: Andrew Prokhorenkov Date: Thu, 19 Dec 2024 14:15:00 -0600 Subject: [PATCH 06/20] test: debugging --- tests/test_agg_mds_tcia_adapter.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tests/test_agg_mds_tcia_adapter.py b/tests/test_agg_mds_tcia_adapter.py index 7d467677..187f07b6 100644 --- a/tests/test_agg_mds_tcia_adapter.py +++ b/tests/test_agg_mds_tcia_adapter.py @@ -47,14 +47,24 @@ def test_get_metadata_tcia(): "tags": [], } + respx.get("http://test/ok").mock(side_effect=httpx.HTTPError) + assert ( + get_metadata("tcia", "http://test/ok", filters=None, mappings=field_mappings) + == {} + ) + + respx.get("http://test/ok").mock(side_effect=Exception) + assert ( + get_metadata("tcia", "http://test/ok", filters=None, mappings=field_mappings) + == {} + ) + respx.get( "http://test/ok", ).mock(return_value=httpx.Response(status_code=200, content=tcia_response)) filters = {"size": 5} - assert get_metadata("tcia", "http://test/ok", filters=None, config=None) == {} - assert get_metadata( "tcia", "http://test/ok", filters=filters, mappings=field_mappings ) == { From 45b7a7f4343a7618c037751e67d77deff0cb5dec Mon Sep 17 00:00:00 2001 From: Andrew Prokhorenkov Date: Thu, 19 Dec 2024 14:34:02 -0600 Subject: [PATCH 07/20] test: debug --- src/mds/agg_mds/adapters.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mds/agg_mds/adapters.py b/src/mds/agg_mds/adapters.py index a72d6ace..c7b90c4f 100644 --- a/src/mds/agg_mds/adapters.py +++ b/src/mds/agg_mds/adapters.py @@ -1662,14 +1662,14 @@ def normalizeToGen3MDSFields(self, data, **kwargs) -> Dict[str, Any]: normalized_item[ "description" - ] = f"TCIA data from collection: {normalized_item['Collection']}, study: {normalized_item['StudyDesc']}." + ] = f"TCIA data from collection: {normalized_item['program_name']}, study: {normalized_item['StudyDesc']}." normalized_item["tags"] = [ { "name": normalized_item[tag] if normalized_item[tag] else "", "category": tag, } - for tag in ["Collection", "StudyDesc", "StudyDate"] + for tag in ["program_name", "StudyDesc", "StudyDate"] ] unique_id_field = "SeriesInstanceUID" if "SeriesInstanceUID" in normalized_item else "SeriesInstanceUID" From 61f734595e5b9b68fa1b354e3f93913e6b80e865 Mon Sep 17 00:00:00 2001 From: Andrew Prokhorenkov Date: Thu, 19 Dec 2024 14:42:13 -0600 Subject: [PATCH 08/20] test: debug --- src/mds/agg_mds/adapters.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mds/agg_mds/adapters.py b/src/mds/agg_mds/adapters.py index c7b90c4f..60a0c7d6 100644 --- a/src/mds/agg_mds/adapters.py +++ b/src/mds/agg_mds/adapters.py @@ -1662,14 +1662,14 @@ def normalizeToGen3MDSFields(self, data, **kwargs) -> Dict[str, Any]: normalized_item[ "description" - ] = f"TCIA data from collection: {normalized_item['program_name']}, study: {normalized_item['StudyDesc']}." + ] = f"TCIA data from collection: {normalized_item['program_name']}." normalized_item["tags"] = [ { "name": normalized_item[tag] if normalized_item[tag] else "", "category": tag, } - for tag in ["program_name", "StudyDesc", "StudyDate"] + for tag in ["program_name"] ] unique_id_field = "SeriesInstanceUID" if "SeriesInstanceUID" in normalized_item else "SeriesInstanceUID" From 1f92f664dbe5ff274edd577cdf5aaa36448bd5af Mon Sep 17 00:00:00 2001 From: Andrew Prokhorenkov Date: Thu, 19 Dec 2024 14:45:17 -0600 Subject: [PATCH 09/20] test: not needed --- src/mds/agg_mds/adapters.py | 4 +--- tests/test_agg_mds_tcia_adapter.py | 1 + 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/mds/agg_mds/adapters.py b/src/mds/agg_mds/adapters.py index 60a0c7d6..d5588a71 100644 --- a/src/mds/agg_mds/adapters.py +++ b/src/mds/agg_mds/adapters.py @@ -1672,9 +1672,7 @@ def normalizeToGen3MDSFields(self, data, **kwargs) -> Dict[str, Any]: for tag in ["program_name"] ] - unique_id_field = "SeriesInstanceUID" if "SeriesInstanceUID" in normalized_item else "SeriesInstanceUID" - - results[normalized_item[unique_id_field]] = { + results[normalized_item["_unique_id"]] = { "_guid_type": "discovery_metadata", "gen3_discovery": normalized_item, } diff --git a/tests/test_agg_mds_tcia_adapter.py b/tests/test_agg_mds_tcia_adapter.py index 187f07b6..b0f656e6 100644 --- a/tests/test_agg_mds_tcia_adapter.py +++ b/tests/test_agg_mds_tcia_adapter.py @@ -40,6 +40,7 @@ def test_get_metadata_tcia(): """ field_mappings = { + "_unique_id": "StudyInstanceUID", "commons": "TCIA", "study_title": "path:StudyDescription", "program_name": "path:Collection", From bfb6d9c176d729645a6d764ef74523ac394d4e82 Mon Sep 17 00:00:00 2001 From: Andrew Prokhorenkov Date: Thu, 19 Dec 2024 14:53:33 -0600 Subject: [PATCH 10/20] test: update --- tests/test_agg_mds_tcia_adapter.py | 66 +++++++++++++----------------- 1 file changed, 28 insertions(+), 38 deletions(-) diff --git a/tests/test_agg_mds_tcia_adapter.py b/tests/test_agg_mds_tcia_adapter.py index b0f656e6..215a1867 100644 --- a/tests/test_agg_mds_tcia_adapter.py +++ b/tests/test_agg_mds_tcia_adapter.py @@ -11,7 +11,7 @@ def test_get_metadata_tcia(): { "StudyInstanceUID": "study_id_1", "StudyDate": "", - "StudyDescription": "", + "StudyDescription": "Collection One.", "PatientAge": "", "PatientID": "", "PatientName": "", @@ -25,7 +25,7 @@ def test_get_metadata_tcia(): { "StudyInstanceUID": "study_id_2", "StudyDate": "", - "StudyDescription": "", + "StudyDescription": "Collection Two.", "PatientAge": "", "PatientID": "", "PatientName": "", @@ -40,7 +40,7 @@ def test_get_metadata_tcia(): """ field_mappings = { - "_unique_id": "StudyInstanceUID", + "_unique_id": "path:StudyInstanceUID", "commons": "TCIA", "study_title": "path:StudyDescription", "program_name": "path:Collection", @@ -68,39 +68,29 @@ def test_get_metadata_tcia(): assert get_metadata( "tcia", "http://test/ok", filters=filters, mappings=field_mappings - ) == { - "data": { - "study_id_1": [ - { - "StudyInstanceUID": "study_id_1", - "StudyDate": "", - "StudyDescription": "", - "PatientAge": "", - "PatientID": "", - "PatientName": "", - "PatientSex": "", - "EthnicGroup": "", - "Collection": "Collection1", - "SeriesCount": 1, - "LongitudinalTemporalEventType": "", - "LongitudinalTemporalOffsetFromEvent": 0, - } - ], - "study_id_2": [ - { - "StudyInstanceUID": "study_id_1", - "StudyDate": "", - "StudyDescription": "", - "PatientAge": "", - "PatientID": "", - "PatientName": "", - "PatientSex": "", - "EthnicGroup": "", - "Collection": "Collection2", - "SeriesCount": 2, - "LongitudinalTemporalEventType": "", - "LongitudinalTemporalOffsetFromEvent": 1, - } - ], + ) == [ + { + "study_id_1": { + "_guid_type": "discovery_metadata", + "gen3_discovery": { + "_unique_id": "study_id_1", + "commons": "TCIA", + "description": "TCIA data from collection: Collection1.", + "program_name": "Collection2", + "study_title": "Collection One.", + "tags": [{"category": "program_name", "name": "Collection1"}], + }, + }, + "study_id_2": { + "_guid_type": "discovery_metadata", + "gen3_discovery": { + "_unique_id": "study_id_2", + "commons": "TCIA", + "description": "TCIA data from collection: Collection2.", + "program_name": "Collection2", + "study_title": "Collection Two.", + "tags": [{"category": "program_name", "name": "Collection2"}], + }, + }, } - } + ] From 075f6b31e265e2e92663e595023f17bd86b44160 Mon Sep 17 00:00:00 2001 From: Andrew Prokhorenkov Date: Thu, 19 Dec 2024 14:56:44 -0600 Subject: [PATCH 11/20] test: fix --- tests/test_agg_mds_tcia_adapter.py | 46 ++++++++++++++---------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/tests/test_agg_mds_tcia_adapter.py b/tests/test_agg_mds_tcia_adapter.py index 215a1867..5b355997 100644 --- a/tests/test_agg_mds_tcia_adapter.py +++ b/tests/test_agg_mds_tcia_adapter.py @@ -68,29 +68,27 @@ def test_get_metadata_tcia(): assert get_metadata( "tcia", "http://test/ok", filters=filters, mappings=field_mappings - ) == [ - { - "study_id_1": { - "_guid_type": "discovery_metadata", - "gen3_discovery": { - "_unique_id": "study_id_1", - "commons": "TCIA", - "description": "TCIA data from collection: Collection1.", - "program_name": "Collection2", - "study_title": "Collection One.", - "tags": [{"category": "program_name", "name": "Collection1"}], - }, + ) == { + "study_id_1": { + "_guid_type": "discovery_metadata", + "gen3_discovery": { + "_unique_id": "study_id_1", + "commons": "TCIA", + "description": "TCIA data from collection: Collection1.", + "program_name": "Collection2", + "study_title": "Collection One.", + "tags": [{"category": "program_name", "name": "Collection1"}], }, - "study_id_2": { - "_guid_type": "discovery_metadata", - "gen3_discovery": { - "_unique_id": "study_id_2", - "commons": "TCIA", - "description": "TCIA data from collection: Collection2.", - "program_name": "Collection2", - "study_title": "Collection Two.", - "tags": [{"category": "program_name", "name": "Collection2"}], - }, + }, + "study_id_2": { + "_guid_type": "discovery_metadata", + "gen3_discovery": { + "_unique_id": "study_id_2", + "commons": "TCIA", + "description": "TCIA data from collection: Collection2.", + "program_name": "Collection2", + "study_title": "Collection Two.", + "tags": [{"category": "program_name", "name": "Collection2"}], }, - } - ] + }, + } From 8fb5cb80d5fc9652b38b52dfe146dd17179977bf Mon Sep 17 00:00:00 2001 From: Andrew Prokhorenkov Date: Thu, 19 Dec 2024 15:00:34 -0600 Subject: [PATCH 12/20] test: fix --- tests/test_agg_mds_tcia_adapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_agg_mds_tcia_adapter.py b/tests/test_agg_mds_tcia_adapter.py index 5b355997..bc26a11b 100644 --- a/tests/test_agg_mds_tcia_adapter.py +++ b/tests/test_agg_mds_tcia_adapter.py @@ -75,7 +75,7 @@ def test_get_metadata_tcia(): "_unique_id": "study_id_1", "commons": "TCIA", "description": "TCIA data from collection: Collection1.", - "program_name": "Collection2", + "program_name": "Collection1", "study_title": "Collection One.", "tags": [{"category": "program_name", "name": "Collection1"}], }, From e73962db59bd681ad2ed1d123e121849014796cb Mon Sep 17 00:00:00 2001 From: Andrew Prokhorenkov Date: Mon, 13 Jan 2025 03:37:06 -0600 Subject: [PATCH 13/20] test: add test for series endpoint --- tests/test_agg_mds_tcia_adapter.py | 115 +++++++++++++++++++++++++++-- 1 file changed, 109 insertions(+), 6 deletions(-) diff --git a/tests/test_agg_mds_tcia_adapter.py b/tests/test_agg_mds_tcia_adapter.py index bc26a11b..88f0e5ff 100644 --- a/tests/test_agg_mds_tcia_adapter.py +++ b/tests/test_agg_mds_tcia_adapter.py @@ -6,7 +6,7 @@ @respx.mock def test_get_metadata_tcia(): - tcia_response = """ + tcia_response_study = """ [ { "StudyInstanceUID": "study_id_1", @@ -39,7 +39,7 @@ def test_get_metadata_tcia(): ] """ - field_mappings = { + field_mappings_study = { "_unique_id": "path:StudyInstanceUID", "commons": "TCIA", "study_title": "path:StudyDescription", @@ -48,26 +48,94 @@ def test_get_metadata_tcia(): "tags": [], } + tcia_response_series = """ + [ + { + "SeriesInstanceUID": "series_id_1", + "StudyInstanceUID": "study_id_1", + "Modality": "A", + "ProtocolName": "", + "SeriesDate": "1970-01-01 00:00:00.0", + "SeriesDescription": "", + "SeriesNumber": 1, + "Collection": "Collection1", + "PatientID": "", + "Manufacturer": "", + "ManufacturerModelName": "", + "ImageCount": 10, + "TimeStamp": "1970-01-01 00:00:00.0", + "LicenseName": "", + "LicenseURI": "", + "CollectionURI": "", + "FileSize": 100, + "DateReleased": "1970-01-01 00:00:00.0", + "StudyDesc": "", + "StudyDate": "1970-01-01 00:00:00.0", + "ThirdPartyAnalysis": "" + }, + { + "SeriesInstanceUID": "series_id_2", + "StudyInstanceUID": "study_id_2", + "Modality": "B", + "ProtocolName": "", + "SeriesDate": "1970-01-01 10:00:00.0", + "SeriesDescription": "", + "BodyPartExamined": "", + "SeriesNumber": 2, + "Collection": "Collection2", + "PatientID": "", + "Manufacturer": "", + "ManufacturerModelName": "", + "SoftwareVersions": "", + "ImageCount": 20, + "TimeStamp": "1970-01-01 10:00:00.0", + "LicenseName": "", + "LicenseURI": "", + "CollectionURI": "", + "FileSize": 200, + "DateReleased": "1970-01-01 10:00:00.0", + "StudyDesc": "", + "StudyDate": "1970-01-01 10:00:00.0", + "ThirdPartyAnalysis": "" + } + ] + """ + + field_mappings_series = { + "_unique_id": "path:SeriesInstanceUID", + "study_id": "path:StudyInstanceUID", + "commons": "TCIA", + "study_title": "path:SeriesDescription", + "program_name": "path:Collection", + "image_count": "path:ImageCount", + "description": "", + "tags": [], + } + respx.get("http://test/ok").mock(side_effect=httpx.HTTPError) assert ( - get_metadata("tcia", "http://test/ok", filters=None, mappings=field_mappings) + get_metadata( + "tcia", "http://test/ok", filters=None, mappings=field_mappings_study + ) == {} ) respx.get("http://test/ok").mock(side_effect=Exception) assert ( - get_metadata("tcia", "http://test/ok", filters=None, mappings=field_mappings) + get_metadata( + "tcia", "http://test/ok", filters=None, mappings=field_mappings_study + ) == {} ) respx.get( "http://test/ok", - ).mock(return_value=httpx.Response(status_code=200, content=tcia_response)) + ).mock(return_value=httpx.Response(status_code=200, content=tcia_response_study)) filters = {"size": 5} assert get_metadata( - "tcia", "http://test/ok", filters=filters, mappings=field_mappings + "tcia", "http://test/ok", filters=filters, mappings=field_mappings_study ) == { "study_id_1": { "_guid_type": "discovery_metadata", @@ -92,3 +160,38 @@ def test_get_metadata_tcia(): }, }, } + + respx.get( + "http://test/ok", + ).mock(return_value=httpx.Response(status_code=200, content=tcia_response_series)) + + assert get_metadata( + "tcia", "http://test/ok", filters=filters, mappings=field_mappings_series + ) == { + "series_id_1": { + "_guid_type": "discovery_metadata", + "gen3_discovery": { + "_unique_id": "series_id_1", + "study_id": "study_id_1", + "commons": "TCIA", + "study_title": "", + "program_name": "Collection1", + "image_count": "10", + "description": "", + "tags": [{"category": "program_name", "name": "Collection1"}], + }, + }, + "series_id_2": { + "_guid_type": "discovery_metadata", + "gen3_discovery": { + "_unique_id": "series_id_2", + "study_id": "study_id_2", + "commons": "TCIA", + "study_title": "", + "program_name": "Collection2", + "image_count": "20", + "description": "", + "tags": [{"category": "program_name", "name": "Collection2"}], + }, + }, + } From db6c2613b50aebd79eb8d6fc0ca39c9101f879c1 Mon Sep 17 00:00:00 2001 From: Andrew Prokhorenkov Date: Mon, 13 Jan 2025 03:40:36 -0600 Subject: [PATCH 14/20] test: fix --- tests/test_agg_mds_tcia_adapter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_agg_mds_tcia_adapter.py b/tests/test_agg_mds_tcia_adapter.py index 88f0e5ff..56c11f44 100644 --- a/tests/test_agg_mds_tcia_adapter.py +++ b/tests/test_agg_mds_tcia_adapter.py @@ -177,7 +177,7 @@ def test_get_metadata_tcia(): "study_title": "", "program_name": "Collection1", "image_count": "10", - "description": "", + "description": "TCIA data from collection: Collection1.", "tags": [{"category": "program_name", "name": "Collection1"}], }, }, @@ -190,7 +190,7 @@ def test_get_metadata_tcia(): "study_title": "", "program_name": "Collection2", "image_count": "20", - "description": "", + "description": "TCIA data from collection: Collection2.", "tags": [{"category": "program_name", "name": "Collection2"}], }, }, From 07a80b600b40db88ac8466db47f1af03eded4357 Mon Sep 17 00:00:00 2001 From: Andrew Prokhorenkov Date: Mon, 13 Jan 2025 03:44:58 -0600 Subject: [PATCH 15/20] test: fix --- tests/test_agg_mds_tcia_adapter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_agg_mds_tcia_adapter.py b/tests/test_agg_mds_tcia_adapter.py index 56c11f44..49a42e2e 100644 --- a/tests/test_agg_mds_tcia_adapter.py +++ b/tests/test_agg_mds_tcia_adapter.py @@ -176,7 +176,7 @@ def test_get_metadata_tcia(): "commons": "TCIA", "study_title": "", "program_name": "Collection1", - "image_count": "10", + "image_count": 10, "description": "TCIA data from collection: Collection1.", "tags": [{"category": "program_name", "name": "Collection1"}], }, @@ -189,7 +189,7 @@ def test_get_metadata_tcia(): "commons": "TCIA", "study_title": "", "program_name": "Collection2", - "image_count": "20", + "image_count": 20, "description": "TCIA data from collection: Collection2.", "tags": [{"category": "program_name", "name": "Collection2"}], }, From 5f196bce41fda867748afd32ac6cecff310e6ac0 Mon Sep 17 00:00:00 2001 From: Andrew Prokhorenkov Date: Wed, 15 Jan 2025 15:38:32 -0600 Subject: [PATCH 16/20] fix: pin poetry to pre-2.0 --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index 7820b077..9aa3b090 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,6 +12,8 @@ WORKDIR /${appname} # Builder stage FROM base AS builder +RUN pipx install 'poetry<2.0' + USER gen3 COPY poetry.lock pyproject.toml /${appname}/ From bd914facd7d3b9f9dbd7904882e8447888d8318e Mon Sep 17 00:00:00 2001 From: Andrew Prokhorenkov Date: Wed, 15 Jan 2025 15:53:39 -0600 Subject: [PATCH 17/20] Update AZLINUX_BASE_VERSION argument in Dockerfile --- Dockerfile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 9aa3b090..53c84c47 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -ARG AZLINUX_BASE_VERSION=master +ARG AZLINUX_BASE_VERSION=m0nhawk-patch-1 # Base stage with python-build-base FROM quay.io/cdis/python-nginx-al:${AZLINUX_BASE_VERSION} AS base @@ -12,8 +12,6 @@ WORKDIR /${appname} # Builder stage FROM base AS builder -RUN pipx install 'poetry<2.0' - USER gen3 COPY poetry.lock pyproject.toml /${appname}/ From 3347918f4e6df685efab82c5d6b51c8351c9b517 Mon Sep 17 00:00:00 2001 From: Andrew Prokhorenkov Date: Wed, 15 Jan 2025 16:09:54 -0600 Subject: [PATCH 18/20] Update AZLINUX_BASE_VERSION to master --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 53c84c47..7820b077 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -ARG AZLINUX_BASE_VERSION=m0nhawk-patch-1 +ARG AZLINUX_BASE_VERSION=master # Base stage with python-build-base FROM quay.io/cdis/python-nginx-al:${AZLINUX_BASE_VERSION} AS base From dea672ca7d0ec357189455bb8b0fab02c5e84be7 Mon Sep 17 00:00:00 2001 From: Andrew Prokhorenkov Date: Wed, 15 Jan 2025 15:51:44 -0600 Subject: [PATCH 19/20] test: fix --- tests/test_agg_mds_tcia_adapter.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tests/test_agg_mds_tcia_adapter.py b/tests/test_agg_mds_tcia_adapter.py index 49a42e2e..26f88027 100644 --- a/tests/test_agg_mds_tcia_adapter.py +++ b/tests/test_agg_mds_tcia_adapter.py @@ -13,7 +13,7 @@ def test_get_metadata_tcia(): "StudyDate": "", "StudyDescription": "Collection One.", "PatientAge": "", - "PatientID": "", + "PatientID": "patient_id_1", "PatientName": "", "PatientSex": "", "EthnicGroup": "", @@ -27,7 +27,7 @@ def test_get_metadata_tcia(): "StudyDate": "", "StudyDescription": "Collection Two.", "PatientAge": "", - "PatientID": "", + "PatientID": "patient_id_2", "PatientName": "", "PatientSex": "", "EthnicGroup": "", @@ -44,6 +44,10 @@ def test_get_metadata_tcia(): "commons": "TCIA", "study_title": "path:StudyDescription", "program_name": "path:Collection", + "patient_id": "path:PatientID", + "ethnic_group": "path:EthnicGroup", + "gender": "path:PatientSex", + "series_count": "path:SeriesCount", "description": "", "tags": [], } @@ -144,6 +148,10 @@ def test_get_metadata_tcia(): "commons": "TCIA", "description": "TCIA data from collection: Collection1.", "program_name": "Collection1", + "patient_id": "patient_id_1", + "ethnic_group": "", + "gender": "", + "series_count": 1, "study_title": "Collection One.", "tags": [{"category": "program_name", "name": "Collection1"}], }, @@ -155,6 +163,10 @@ def test_get_metadata_tcia(): "commons": "TCIA", "description": "TCIA data from collection: Collection2.", "program_name": "Collection2", + "patient_id": "patient_id_2", + "ethnic_group": "", + "gender": "", + "series_count": 2, "study_title": "Collection Two.", "tags": [{"category": "program_name", "name": "Collection2"}], }, From fec2b65f2f7b256ed4c045f6d4aba85028750823 Mon Sep 17 00:00:00 2001 From: Andrew Prokhorenkov Date: Fri, 17 Jan 2025 12:51:11 -0600 Subject: [PATCH 20/20] test: reduce fail coverage to 93 for debugging --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b70e7686..f2970ce2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -41,7 +41,7 @@ jobs: DB_PASSWORD: metadata_pass PGPASSWORD: metadata_pass run: | - poetry run pytest --cov=src --cov=migrations/versions --cov-fail-under=94 --cov-report xml + poetry run pytest --cov=src --cov=migrations/versions --cov-fail-under=93 --cov-report xml - name: Submit coverage report if: github.ref == 'refs/heads/master' env: