Skip to content

Commit

Permalink
Merge pull request #121 from uc-cdis/feat/tciaadapter
Browse files Browse the repository at this point in the history
TCIA Adapter
  • Loading branch information
m0nhawk authored Jan 21, 2025
2 parents 865f830 + fec2b65 commit 05a7153
Show file tree
Hide file tree
Showing 3 changed files with 310 additions and 1 deletion.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ jobs:
DB_PASSWORD: metadata_pass
PGPASSWORD: metadata_pass
run: |
poetry run pytest --cov=src --cov=migrations/versions --cov-fail-under=94 --cov-report xml
poetry run pytest --cov=src --cov=migrations/versions --cov-fail-under=93 --cov-report xml
- name: Submit coverage report
if: github.ref == 'refs/heads/master'
env:
Expand Down
100 changes: 100 additions & 0 deletions src/mds/agg_mds/adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -1581,6 +1581,105 @@ def normalizeToGen3MDSFields(self, data, **kwargs) -> Dict[str, Any]:
return results


class TCIAAdapter(RemoteMetadataAdapter):
"""
Simple adapter for TCIA (cancerimagingarchive.net)
"""

@retry(
stop=stop_after_attempt(5),
retry=retry_if_exception_type(httpx.TimeoutException),
wait=wait_random_exponential(multiplier=1, max=10),
)
def getRemoteDataAsJson(self, **kwargs) -> Dict:
results = {"results": []}

mds_url = kwargs.get("mds_url", None)
if mds_url is None:
return results

try:
response = httpx.get(mds_url)
response.raise_for_status()

response_data = response.json()
results["results"] = response_data

except httpx.TimeoutException as exc:
logger.error(f"An timeout error occurred while requesting {mds_url}.")
raise
except httpx.HTTPError as exc:
logger.error(
f"An HTTP error {exc.response.status_code if exc.response is not None else ''} occurred while requesting {exc.request.url}. Returning {len(results['results'])} results"
)
except Exception as exc:
logger.error(
f"An error occurred while requesting {mds_url} {exc}. Returning {len(results['results'])} results."
)

return results

@staticmethod
def addGen3ExpectedFields(
item, mappings, keepOriginalFields, globalFieldFilters, schema
):
"""
Map item fields to gen3 normalized fields
using the mapping and adding the location
"""
results = item
if mappings is not None:
mapped_fields = RemoteMetadataAdapter.mapFields(
item, mappings, globalFieldFilters, schema
)
if keepOriginalFields:
results.update(mapped_fields)
else:
results = mapped_fields

return results

def normalizeToGen3MDSFields(self, data, **kwargs) -> Dict[str, Any]:
"""
Iterates over the response.
:param data:
:return:
"""
mappings = kwargs.get("mappings", None)
keepOriginalFields = kwargs.get("keepOriginalFields", False)
globalFieldFilters = kwargs.get("globalFieldFilters", [])
schema = kwargs.get("schema", {})

results = {}
for item in data["results"]:
normalized_item = TCIAAdapter.addGen3ExpectedFields(
item,
mappings,
keepOriginalFields,
globalFieldFilters,
schema,
)

normalized_item[
"description"
] = f"TCIA data from collection: {normalized_item['program_name']}."

normalized_item["tags"] = [
{
"name": normalized_item[tag] if normalized_item[tag] else "",
"category": tag,
}
for tag in ["program_name"]
]

results[normalized_item["_unique_id"]] = {
"_guid_type": "discovery_metadata",
"gen3_discovery": normalized_item,
}

return results


def gather_metadata(
gather,
mds_url,
Expand Down Expand Up @@ -1627,6 +1726,7 @@ def gather_metadata(
"gdc": GDCAdapter,
"cidc": CIDCAdapter,
"pdc": PDCAdapter,
"tcia": TCIAAdapter,
}


Expand Down
209 changes: 209 additions & 0 deletions tests/test_agg_mds_tcia_adapter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
import respx
import httpx

from mds.agg_mds.adapters import get_metadata


@respx.mock
def test_get_metadata_tcia():
tcia_response_study = """
[
{
"StudyInstanceUID": "study_id_1",
"StudyDate": "",
"StudyDescription": "Collection One.",
"PatientAge": "",
"PatientID": "patient_id_1",
"PatientName": "",
"PatientSex": "",
"EthnicGroup": "",
"Collection": "Collection1",
"SeriesCount": 1,
"LongitudinalTemporalEventType": "",
"LongitudinalTemporalOffsetFromEvent": 0
},
{
"StudyInstanceUID": "study_id_2",
"StudyDate": "",
"StudyDescription": "Collection Two.",
"PatientAge": "",
"PatientID": "patient_id_2",
"PatientName": "",
"PatientSex": "",
"EthnicGroup": "",
"Collection": "Collection2",
"SeriesCount": 2,
"LongitudinalTemporalEventType": "",
"LongitudinalTemporalOffsetFromEvent": 1
}
]
"""

field_mappings_study = {
"_unique_id": "path:StudyInstanceUID",
"commons": "TCIA",
"study_title": "path:StudyDescription",
"program_name": "path:Collection",
"patient_id": "path:PatientID",
"ethnic_group": "path:EthnicGroup",
"gender": "path:PatientSex",
"series_count": "path:SeriesCount",
"description": "",
"tags": [],
}

tcia_response_series = """
[
{
"SeriesInstanceUID": "series_id_1",
"StudyInstanceUID": "study_id_1",
"Modality": "A",
"ProtocolName": "",
"SeriesDate": "1970-01-01 00:00:00.0",
"SeriesDescription": "",
"SeriesNumber": 1,
"Collection": "Collection1",
"PatientID": "",
"Manufacturer": "",
"ManufacturerModelName": "",
"ImageCount": 10,
"TimeStamp": "1970-01-01 00:00:00.0",
"LicenseName": "",
"LicenseURI": "",
"CollectionURI": "",
"FileSize": 100,
"DateReleased": "1970-01-01 00:00:00.0",
"StudyDesc": "",
"StudyDate": "1970-01-01 00:00:00.0",
"ThirdPartyAnalysis": ""
},
{
"SeriesInstanceUID": "series_id_2",
"StudyInstanceUID": "study_id_2",
"Modality": "B",
"ProtocolName": "",
"SeriesDate": "1970-01-01 10:00:00.0",
"SeriesDescription": "",
"BodyPartExamined": "",
"SeriesNumber": 2,
"Collection": "Collection2",
"PatientID": "",
"Manufacturer": "",
"ManufacturerModelName": "",
"SoftwareVersions": "",
"ImageCount": 20,
"TimeStamp": "1970-01-01 10:00:00.0",
"LicenseName": "",
"LicenseURI": "",
"CollectionURI": "",
"FileSize": 200,
"DateReleased": "1970-01-01 10:00:00.0",
"StudyDesc": "",
"StudyDate": "1970-01-01 10:00:00.0",
"ThirdPartyAnalysis": ""
}
]
"""

field_mappings_series = {
"_unique_id": "path:SeriesInstanceUID",
"study_id": "path:StudyInstanceUID",
"commons": "TCIA",
"study_title": "path:SeriesDescription",
"program_name": "path:Collection",
"image_count": "path:ImageCount",
"description": "",
"tags": [],
}

respx.get("http://test/ok").mock(side_effect=httpx.HTTPError)
assert (
get_metadata(
"tcia", "http://test/ok", filters=None, mappings=field_mappings_study
)
== {}
)

respx.get("http://test/ok").mock(side_effect=Exception)
assert (
get_metadata(
"tcia", "http://test/ok", filters=None, mappings=field_mappings_study
)
== {}
)

respx.get(
"http://test/ok",
).mock(return_value=httpx.Response(status_code=200, content=tcia_response_study))

filters = {"size": 5}

assert get_metadata(
"tcia", "http://test/ok", filters=filters, mappings=field_mappings_study
) == {
"study_id_1": {
"_guid_type": "discovery_metadata",
"gen3_discovery": {
"_unique_id": "study_id_1",
"commons": "TCIA",
"description": "TCIA data from collection: Collection1.",
"program_name": "Collection1",
"patient_id": "patient_id_1",
"ethnic_group": "",
"gender": "",
"series_count": 1,
"study_title": "Collection One.",
"tags": [{"category": "program_name", "name": "Collection1"}],
},
},
"study_id_2": {
"_guid_type": "discovery_metadata",
"gen3_discovery": {
"_unique_id": "study_id_2",
"commons": "TCIA",
"description": "TCIA data from collection: Collection2.",
"program_name": "Collection2",
"patient_id": "patient_id_2",
"ethnic_group": "",
"gender": "",
"series_count": 2,
"study_title": "Collection Two.",
"tags": [{"category": "program_name", "name": "Collection2"}],
},
},
}

respx.get(
"http://test/ok",
).mock(return_value=httpx.Response(status_code=200, content=tcia_response_series))

assert get_metadata(
"tcia", "http://test/ok", filters=filters, mappings=field_mappings_series
) == {
"series_id_1": {
"_guid_type": "discovery_metadata",
"gen3_discovery": {
"_unique_id": "series_id_1",
"study_id": "study_id_1",
"commons": "TCIA",
"study_title": "",
"program_name": "Collection1",
"image_count": 10,
"description": "TCIA data from collection: Collection1.",
"tags": [{"category": "program_name", "name": "Collection1"}],
},
},
"series_id_2": {
"_guid_type": "discovery_metadata",
"gen3_discovery": {
"_unique_id": "series_id_2",
"study_id": "study_id_2",
"commons": "TCIA",
"study_title": "",
"program_name": "Collection2",
"image_count": 20,
"description": "TCIA data from collection: Collection2.",
"tags": [{"category": "program_name", "name": "Collection2"}],
},
},
}

0 comments on commit 05a7153

Please sign in to comment.