Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TCIA Adapter #121

Merged
merged 20 commits into from
Jan 21, 2025
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 100 additions & 0 deletions src/mds/agg_mds/adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -1581,6 +1581,105 @@ def normalizeToGen3MDSFields(self, data, **kwargs) -> Dict[str, Any]:
return results


class TCIAAdapter(RemoteMetadataAdapter):
"""
Simple adapter for TCIA (cancerimagingarchive.net)
"""

@retry(
stop=stop_after_attempt(5),
retry=retry_if_exception_type(httpx.TimeoutException),
wait=wait_random_exponential(multiplier=1, max=10),
)
def getRemoteDataAsJson(self, **kwargs) -> Dict:
results = {"results": []}

mds_url = kwargs.get("mds_url", None)
if mds_url is None:
return results

try:
response = httpx.get(mds_url)
response.raise_for_status()

response_data = response.json()
results["results"] = response_data

except httpx.TimeoutException as exc:
logger.error(f"An timeout error occurred while requesting {mds_url}.")
raise
except httpx.HTTPError as exc:
logger.error(
f"An HTTP error {exc.response.status_code if exc.response is not None else ''} occurred while requesting {exc.request.url}. Returning {len(results['results'])} results"
)
except Exception as exc:
logger.error(
f"An error occurred while requesting {mds_url} {exc}. Returning {len(results['results'])} results."
)

return results

@staticmethod
def addGen3ExpectedFields(
item, mappings, keepOriginalFields, globalFieldFilters, schema
):
"""
Map item fields to gen3 normalized fields
using the mapping and adding the location
"""
results = item
if mappings is not None:
mapped_fields = RemoteMetadataAdapter.mapFields(
item, mappings, globalFieldFilters, schema
)
if keepOriginalFields:
results.update(mapped_fields)
else:
results = mapped_fields

return results

def normalizeToGen3MDSFields(self, data, **kwargs) -> Dict[str, Any]:
"""
Iterates over the response.
:param data:
:return:
"""
mappings = kwargs.get("mappings", None)
keepOriginalFields = kwargs.get("keepOriginalFields", False)
globalFieldFilters = kwargs.get("globalFieldFilters", [])
schema = kwargs.get("schema", {})

results = {}
for item in data["results"]:
normalized_item = TCIAAdapter.addGen3ExpectedFields(
item,
mappings,
keepOriginalFields,
globalFieldFilters,
schema,
)

normalized_item[
"description"
] = f"TCIA data from collection: {normalized_item['program_name']}."

normalized_item["tags"] = [
{
"name": normalized_item[tag] if normalized_item[tag] else "",
"category": tag,
}
for tag in ["program_name"]
]

results[normalized_item["_unique_id"]] = {
"_guid_type": "discovery_metadata",
"gen3_discovery": normalized_item,
}

return results


def gather_metadata(
gather,
mds_url,
Expand Down Expand Up @@ -1627,6 +1726,7 @@ def gather_metadata(
"gdc": GDCAdapter,
"cidc": CIDCAdapter,
"pdc": PDCAdapter,
"tcia": TCIAAdapter,
}


Expand Down
94 changes: 94 additions & 0 deletions tests/test_agg_mds_tcia_adapter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import respx
import httpx

from mds.agg_mds.adapters import get_metadata


@respx.mock
def test_get_metadata_tcia():
tcia_response = """

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This TCIA response only test for subject level metadata. For study-level metadata coming from this URL I think the mapping should be like this

{
  "_guid_type": "TCIA_promethueus_study_metadata",
  "gen3_discovery": {
    "tags": [
      {
           "name": "TCIA",
           "category": "Data Source"
      }
    ],
    "authz": "",
    "_unique_id": "path:SeriesInstanceUID",
    "commons": "",
    "data_source": "TCIA",
    "apollo_id": "path:PatientID",
    "short_name": "path:ProtocolName",
    "study_description" : "path:SeriesDescription",
    "program_name": "path:Collection",
    "subject_id": "path:StudyInstanceUID",
    "series_number" : "path:SeriesNumber",
    "image_count": "path:ImageCount",
    "study_url": "path:CollectionURI",
  }
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have added the part for study-level to the tests.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for adding that

[
{
"StudyInstanceUID": "study_id_1",
"StudyDate": "",
"StudyDescription": "Collection One.",
"PatientAge": "",
"PatientID": "",
"PatientName": "",
"PatientSex": "",
"EthnicGroup": "",
"Collection": "Collection1",
"SeriesCount": 1,
"LongitudinalTemporalEventType": "",
"LongitudinalTemporalOffsetFromEvent": 0
},
{
"StudyInstanceUID": "study_id_2",
"StudyDate": "",
"StudyDescription": "Collection Two.",
"PatientAge": "",
"PatientID": "",
"PatientName": "",
"PatientSex": "",
"EthnicGroup": "",
"Collection": "Collection2",
"SeriesCount": 2,
"LongitudinalTemporalEventType": "",
"LongitudinalTemporalOffsetFromEvent": 1
}
]
"""

field_mappings = {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need PatientID, EthnicGroup, Patient Sex and SeriesCount also in the final fields

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just tests. Though, I have added the fields for clarification.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for adding the fields. I had requested it to align with CTDS guidelines, but as long as it works reliably, I’m okay to proceed.

"_unique_id": "path:StudyInstanceUID",
"commons": "TCIA",
"study_title": "path:StudyDescription",
"program_name": "path:Collection",
"description": "",
"tags": [],
}

respx.get("http://test/ok").mock(side_effect=httpx.HTTPError)
assert (
get_metadata("tcia", "http://test/ok", filters=None, mappings=field_mappings)
== {}
)

respx.get("http://test/ok").mock(side_effect=Exception)
assert (
get_metadata("tcia", "http://test/ok", filters=None, mappings=field_mappings)
== {}
)

respx.get(
"http://test/ok",
).mock(return_value=httpx.Response(status_code=200, content=tcia_response))

filters = {"size": 5}

assert get_metadata(
"tcia", "http://test/ok", filters=filters, mappings=field_mappings
) == {
"study_id_1": {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On some level this is how the subject metadata coming out of adapter should be

{
  "_guid_type": "TCIA_promethueus_subject_metadata",
  "gen3_discovery": {
    "tags": [
      {
           "name": "TCIA",
           "category": "Data Source"
      },
      {
           "name": "path:PatientSex",
           "category": "gender"
      },
      {
          "name": "path:EthnicGroup",
          "category": "race"
      }
    ],
    "authz": "",
    "gender": "path:PatientSex",
    "commons": "",
    "data_source": "TCIA",
    "apollo_id": "path:PatientID",
    "_unique_id": "path:StudyInstanceUID",
    "subject_id": "path:StudyInstanceUID",
    "year_of_birth" :  datetime.datetime.now().year - path:PatientAge.replace("Y" , ""), (convert age to year of birth)
    "gender" :  "path:PatientSex",
    "race" :  "path:EthnicGroup",
    "_series_count" : "path:SeriesCount",
    "study_title": "path:StudyDescription",
    "program_name": "path:Collection",
  }
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just tests. From what I see it has the same structure to what you provided.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Appreciate that. My request was to meet the testing standards, but looking at time crunch, if it works as required, I’m fine moving forward

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am as well.

"_guid_type": "discovery_metadata",
"gen3_discovery": {
"_unique_id": "study_id_1",
"commons": "TCIA",
"description": "TCIA data from collection: Collection1.",
"program_name": "Collection1",
"study_title": "Collection One.",
"tags": [{"category": "program_name", "name": "Collection1"}],
},
},
"study_id_2": {
"_guid_type": "discovery_metadata",
"gen3_discovery": {
"_unique_id": "study_id_2",
"commons": "TCIA",
"description": "TCIA data from collection: Collection2.",
"program_name": "Collection2",
"study_title": "Collection Two.",
"tags": [{"category": "program_name", "name": "Collection2"}],
},
},
}
Loading