Skip to content

Commit

Permalink
Merge pull request #342 from smart-on-fhir/mikix/no-med-spam
Browse files Browse the repository at this point in the history
medicationrequest: don't spam console with "need --fhir-url" errors
  • Loading branch information
mikix authored Aug 28, 2024
2 parents 9d1dfe5 + 4605c81 commit 55db90e
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 13 deletions.
6 changes: 3 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# syntax=docker/dockerfile:1

FROM alpine/git as ms-tool-src
FROM alpine/git AS ms-tool-src
RUN git clone https://github.com/microsoft/Tools-for-Health-Data-Anonymization.git /app

FROM mcr.microsoft.com/dotnet/sdk:6.0 AS ms-tool
Expand All @@ -25,7 +25,7 @@ RUN --mount=type=cache,target=/root/.cache \
pip3 install /app/[tests]
RUN rm -r /app

ENV JAVA_HOME /opt/java/openjdk
ENV JAVA_HOME=/opt/java/openjdk

ENTRYPOINT ["cumulus-etl"]

Expand All @@ -42,6 +42,6 @@ RUN --mount=type=cache,target=/root/.cache \
pip3 install /app
RUN rm -r /app

ENV JAVA_HOME /opt/java/openjdk
ENV JAVA_HOME=/opt/java/openjdk

ENTRYPOINT ["cumulus-etl"]
28 changes: 20 additions & 8 deletions cumulus_etl/etl/tasks/basic_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ class MedicationRequestTask(tasks.EtlTask):
tags: ClassVar = {"cpu"}

# We may write to a second Medication table as we go.
# MedicationRequest can have inline medications via CodeableConcepts, or external Medication references.
# MedicationRequest can have inline medications via CodeableConcepts, or external Medication
# references.
# If external, we'll download them and stuff them in this output table.
# We do all this special business logic because Medication is a special, "reference" resource,
# and many EHRs don't let you simply bulk export them.
Expand All @@ -110,20 +111,27 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

# Keep a cache of medication IDs that we've already downloaded.
# If this ends up growing too large in practice, we can wipe it during a call to table_batch_cleanup().
# If this ends up growing too large in practice, we can wipe it during a call to
# table_batch_cleanup().
# But let's try initially with keeping it around for the whole task.
self.medication_ids = set()

# Track whether we already warned about downloading Medications, to avoid spamming.
self.warned_connection_error = False

def scrub_medication(self, medication: dict | None) -> bool:
"""Scrub incoming medication resources, returns False if it should be skipped"""
if not medication or not self.scrubber.scrub_resource(medication): # standard scrubbing
return False

# Normally the above is all we'd need to do.
# But this resource just came hot from the FHIR server, and we did not run the MS anonymizer on it.
# But this resource just came hot from the FHIR server, and we did not run the MS
# anonymizer on it.
# Since Medications are not patient-specific, we don't need the full MS treatment.
# But still, we should probably drop some bits that might more easily identify the *institution*.
# This is a poor-man's MS config tool (and a blocklist rather than allow-list, but it's a very simple resource)
# But still, we should probably drop some bits that might more easily identify the
# *institution*.
# This is a poor-man's MS config tool (and a blocklist rather than allow-list, but it's a
# very simple resource)

# *should* remove extensions at all layers, but this will catch 99% of them
medication.pop("extension", None)
Expand All @@ -141,16 +149,20 @@ async def fetch_medication(self, resource: dict) -> dict | None:

if not reference.startswith("#"):
# Don't duplicate medications we've already seen this run.
# This will still duplicate medications from previous runs, but avoiding that feels like more work than it's
# worth - just download em again and push em through (there might be updates to the resources, too!)
# This will still duplicate medications from previous runs, but avoiding that feels
# like more work than it's worth - just download em again and push em through (there
# might be updates to the resources, too!)
if reference in self.medication_ids:
return None
self.medication_ids.add(reference)

try:
medication = await fhir.download_reference(self.task_config.client, reference)
except Exception as exc:
logging.warning("Could not download Medication reference: %s", exc)
if not self.warned_connection_error:
logging.warning("Could not download Medication reference: %s", exc)
self.warned_connection_error = True

self.summaries[1].had_errors = True

if self.task_config.dir_errors:
Expand Down
23 changes: 21 additions & 2 deletions tests/etl/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,8 @@ async def test_external_medications_with_error(self, mock_download):
ValueError("bad haircut"),
]

await basic_tasks.MedicationRequestTask(self.job_config, self.scrubber).run()
with self.assertLogs(level="WARNING") as logs:
await basic_tasks.MedicationRequestTask(self.job_config, self.scrubber).run()

med_format = self.format
med_req_format = self.format2
Expand All @@ -451,6 +452,11 @@ async def test_external_medications_with_error(self, mock_download):
batch = med_req_format.write_records.call_args[0][0]
self.assertEqual(3, len(batch.rows))

# Confirm we only logged about the first problem (to avoid spamming console)
self.assertEqual(
logs.output, ["WARNING:root:Could not download Medication reference: bad hostname"]
)

# Confirm we still wrote out the medication for B
self.assertEqual(1, med_format.write_records.call_count)
batch = med_format.write_records.call_args[0][0]
Expand Down Expand Up @@ -520,6 +526,11 @@ async def test_external_medications_skips_unknown_modifiers(self, mock_download)
self.make_json(
"MedicationRequest", "B", medicationReference={"reference": "Medication/good"}
)
self.make_json(
"MedicationRequest",
"Skipped", # this will be ignored due to uknown modifier in the MedReq itself
modifierExtension=[{"url": "unrecognized"}],
)
mock_download.side_effect = [
{
"resourceType": "Medication",
Expand All @@ -534,7 +545,15 @@ async def test_external_medications_skips_unknown_modifiers(self, mock_download)

await basic_tasks.MedicationRequestTask(self.job_config, self.scrubber).run()

med_format = self.format
med_format = self.format # Medication
medreq_format = self.format2 # MedicationRequest

self.assertEqual(1, medreq_format.write_records.call_count)
batch = medreq_format.write_records.call_args[0][0]
self.assertEqual( # no "Skipped"
{self.codebook.db.resource_hash("A"), self.codebook.db.resource_hash("B")},
{row["id"] for row in batch.rows},
)

self.assertEqual(1, med_format.write_records.call_count)
batch = med_format.write_records.call_args[0][0]
Expand Down

0 comments on commit 55db90e

Please sign in to comment.