Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TCIA Adapter #121

Merged
merged 20 commits into from
Jan 21, 2025
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 100 additions & 0 deletions src/mds/agg_mds/adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -1581,6 +1581,105 @@ def normalizeToGen3MDSFields(self, data, **kwargs) -> Dict[str, Any]:
return results


class TCIAAdapter(RemoteMetadataAdapter):
"""
Simple adapter for TCIA (cancerimagingarchive.net)
"""

@retry(
stop=stop_after_attempt(5),
retry=retry_if_exception_type(httpx.TimeoutException),
wait=wait_random_exponential(multiplier=1, max=10),
)
def getRemoteDataAsJson(self, **kwargs) -> Dict:
results = {"results": []}

mds_url = kwargs.get("mds_url", None)
if mds_url is None:
return results

try:
response = httpx.get(mds_url)
response.raise_for_status()

response_data = response.json()
results["results"] = response_data

except httpx.TimeoutException as exc:
logger.error(f"An timeout error occurred while requesting {mds_url}.")
raise
except httpx.HTTPError as exc:
logger.error(
f"An HTTP error {exc.response.status_code if exc.response is not None else ''} occurred while requesting {exc.request.url}. Returning {len(results['results'])} results"
)
except Exception as exc:
logger.error(
f"An error occurred while requesting {mds_url} {exc}. Returning {len(results['results'])} results."
)

return results

@staticmethod
def addGen3ExpectedFields(
item, mappings, keepOriginalFields, globalFieldFilters, schema
):
"""
Map item fields to gen3 normalized fields
using the mapping and adding the location
"""
results = item
if mappings is not None:
mapped_fields = RemoteMetadataAdapter.mapFields(
item, mappings, globalFieldFilters, schema
)
if keepOriginalFields:
results.update(mapped_fields)
else:
results = mapped_fields

return results

def normalizeToGen3MDSFields(self, data, **kwargs) -> Dict[str, Any]:
"""
Iterates over the response.
:param data:
:return:
"""
mappings = kwargs.get("mappings", None)
keepOriginalFields = kwargs.get("keepOriginalFields", False)
globalFieldFilters = kwargs.get("globalFieldFilters", [])
schema = kwargs.get("schema", {})

results = {}
for item in data["results"]:
normalized_item = TCIAAdapter.addGen3ExpectedFields(
item,
mappings,
keepOriginalFields,
globalFieldFilters,
schema,
)

normalized_item[
"description"
] = f"TCIA data from collection: {normalized_item['program_name']}."

normalized_item["tags"] = [
{
"name": normalized_item[tag] if normalized_item[tag] else "",
"category": tag,
}
for tag in ["program_name"]
]

results[normalized_item["_unique_id"]] = {
"_guid_type": "discovery_metadata",
"gen3_discovery": normalized_item,
}

return results


def gather_metadata(
gather,
mds_url,
Expand Down Expand Up @@ -1627,6 +1726,7 @@ def gather_metadata(
"gdc": GDCAdapter,
"cidc": CIDCAdapter,
"pdc": PDCAdapter,
"tcia": TCIAAdapter,
}


Expand Down
197 changes: 197 additions & 0 deletions tests/test_agg_mds_tcia_adapter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
import respx
import httpx

from mds.agg_mds.adapters import get_metadata


@respx.mock
def test_get_metadata_tcia():
tcia_response_study = """
[
{
"StudyInstanceUID": "study_id_1",
"StudyDate": "",
"StudyDescription": "Collection One.",
"PatientAge": "",
"PatientID": "",
"PatientName": "",
"PatientSex": "",
"EthnicGroup": "",
"Collection": "Collection1",
"SeriesCount": 1,
"LongitudinalTemporalEventType": "",
"LongitudinalTemporalOffsetFromEvent": 0
},
{
"StudyInstanceUID": "study_id_2",
"StudyDate": "",
"StudyDescription": "Collection Two.",
"PatientAge": "",
"PatientID": "",
"PatientName": "",
"PatientSex": "",
"EthnicGroup": "",
"Collection": "Collection2",
"SeriesCount": 2,
"LongitudinalTemporalEventType": "",
"LongitudinalTemporalOffsetFromEvent": 1
}
]
"""

field_mappings_study = {
"_unique_id": "path:StudyInstanceUID",
"commons": "TCIA",
"study_title": "path:StudyDescription",
"program_name": "path:Collection",
"description": "",
"tags": [],
}

tcia_response_series = """
[
{
"SeriesInstanceUID": "series_id_1",
"StudyInstanceUID": "study_id_1",
"Modality": "A",
"ProtocolName": "",
"SeriesDate": "1970-01-01 00:00:00.0",
"SeriesDescription": "",
"SeriesNumber": 1,
"Collection": "Collection1",
"PatientID": "",
"Manufacturer": "",
"ManufacturerModelName": "",
"ImageCount": 10,
"TimeStamp": "1970-01-01 00:00:00.0",
"LicenseName": "",
"LicenseURI": "",
"CollectionURI": "",
"FileSize": 100,
"DateReleased": "1970-01-01 00:00:00.0",
"StudyDesc": "",
"StudyDate": "1970-01-01 00:00:00.0",
"ThirdPartyAnalysis": ""
},
{
"SeriesInstanceUID": "series_id_2",
"StudyInstanceUID": "study_id_2",
"Modality": "B",
"ProtocolName": "",
"SeriesDate": "1970-01-01 10:00:00.0",
"SeriesDescription": "",
"BodyPartExamined": "",
"SeriesNumber": 2,
"Collection": "Collection2",
"PatientID": "",
"Manufacturer": "",
"ManufacturerModelName": "",
"SoftwareVersions": "",
"ImageCount": 20,
"TimeStamp": "1970-01-01 10:00:00.0",
"LicenseName": "",
"LicenseURI": "",
"CollectionURI": "",
"FileSize": 200,
"DateReleased": "1970-01-01 10:00:00.0",
"StudyDesc": "",
"StudyDate": "1970-01-01 10:00:00.0",
"ThirdPartyAnalysis": ""
}
]
"""

field_mappings_series = {
"_unique_id": "path:SeriesInstanceUID",
"study_id": "path:StudyInstanceUID",
"commons": "TCIA",
"study_title": "path:SeriesDescription",
"program_name": "path:Collection",
"image_count": "path:ImageCount",
"description": "",
"tags": [],
}

respx.get("http://test/ok").mock(side_effect=httpx.HTTPError)
assert (
get_metadata(
"tcia", "http://test/ok", filters=None, mappings=field_mappings_study
)
== {}
)

respx.get("http://test/ok").mock(side_effect=Exception)
assert (
get_metadata(
"tcia", "http://test/ok", filters=None, mappings=field_mappings_study
)
== {}
)

respx.get(
"http://test/ok",
).mock(return_value=httpx.Response(status_code=200, content=tcia_response_study))

filters = {"size": 5}

assert get_metadata(
"tcia", "http://test/ok", filters=filters, mappings=field_mappings_study
) == {
"study_id_1": {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On some level this is how the subject metadata coming out of adapter should be

{
  "_guid_type": "TCIA_promethueus_subject_metadata",
  "gen3_discovery": {
    "tags": [
      {
           "name": "TCIA",
           "category": "Data Source"
      },
      {
           "name": "path:PatientSex",
           "category": "gender"
      },
      {
          "name": "path:EthnicGroup",
          "category": "race"
      }
    ],
    "authz": "",
    "gender": "path:PatientSex",
    "commons": "",
    "data_source": "TCIA",
    "apollo_id": "path:PatientID",
    "_unique_id": "path:StudyInstanceUID",
    "subject_id": "path:StudyInstanceUID",
    "year_of_birth" :  datetime.datetime.now().year - path:PatientAge.replace("Y" , ""), (convert age to year of birth)
    "gender" :  "path:PatientSex",
    "race" :  "path:EthnicGroup",
    "_series_count" : "path:SeriesCount",
    "study_title": "path:StudyDescription",
    "program_name": "path:Collection",
  }
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just tests. From what I see it has the same structure to what you provided.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Appreciate that. My request was to meet the testing standards, but looking at time crunch, if it works as required, I’m fine moving forward

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am as well.

"_guid_type": "discovery_metadata",
"gen3_discovery": {
"_unique_id": "study_id_1",
"commons": "TCIA",
"description": "TCIA data from collection: Collection1.",
"program_name": "Collection1",
"study_title": "Collection One.",
"tags": [{"category": "program_name", "name": "Collection1"}],
},
},
"study_id_2": {
"_guid_type": "discovery_metadata",
"gen3_discovery": {
"_unique_id": "study_id_2",
"commons": "TCIA",
"description": "TCIA data from collection: Collection2.",
"program_name": "Collection2",
"study_title": "Collection Two.",
"tags": [{"category": "program_name", "name": "Collection2"}],
},
},
}

respx.get(
"http://test/ok",
).mock(return_value=httpx.Response(status_code=200, content=tcia_response_series))

assert get_metadata(
"tcia", "http://test/ok", filters=filters, mappings=field_mappings_series
) == {
"series_id_1": {
"_guid_type": "discovery_metadata",
"gen3_discovery": {
"_unique_id": "series_id_1",
"study_id": "study_id_1",
"commons": "TCIA",
"study_title": "",
"program_name": "Collection1",
"image_count": 10,
"description": "TCIA data from collection: Collection1.",
"tags": [{"category": "program_name", "name": "Collection1"}],
},
},
"series_id_2": {
"_guid_type": "discovery_metadata",
"gen3_discovery": {
"_unique_id": "series_id_2",
"study_id": "study_id_2",
"commons": "TCIA",
"study_title": "",
"program_name": "Collection2",
"image_count": 20,
"description": "TCIA data from collection: Collection2.",
"tags": [{"category": "program_name", "name": "Collection2"}],
},
},
}
Loading