From 81342fe8a1f660097b7bc9d7f4569940f748abea Mon Sep 17 00:00:00 2001 From: Dan LaManna Date: Wed, 15 May 2024 16:28:15 -0400 Subject: [PATCH] Enforce that RCM cases belong to at most one lesion --- isic_metadata/metadata.py | 20 ++++++++++++++++++++ tests/test_batch.py | 25 +++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/isic_metadata/metadata.py b/isic_metadata/metadata.py index 0e1e6e5..0dd03b3 100644 --- a/isic_metadata/metadata.py +++ b/isic_metadata/metadata.py @@ -164,6 +164,26 @@ def check_rcm_at_most_one_macroscopic_image(self) -> MetadataBatch: return self + @model_validator(mode="after") + def check_rcm_case_at_most_one_lesion(self) -> MetadataBatch: + rcm_case_to_lesions: dict[str, set[str]] = defaultdict(set) + + for item in self.items: + if item.rcm_case_id and item.lesion_id: + rcm_case_to_lesions[item.rcm_case_id].add(item.lesion_id) + + bad_rcm_cases = [ + rcm_case for rcm_case in rcm_case_to_lesions if len(rcm_case_to_lesions[rcm_case]) > 1 + ] + if bad_rcm_cases: + raise PydanticCustomError( + "one_rcm_case_multiple_lesions", + "One or more RCM cases belong to multiple lesions.", + {"examples": bad_rcm_cases[:5]}, + ) + + return self + class MetadataRow(BaseModel): model_config = ConfigDict( diff --git a/tests/test_batch.py b/tests/test_batch.py index f6337d1..466c9eb 100644 --- a/tests/test_batch.py +++ b/tests/test_batch.py @@ -51,3 +51,28 @@ def test_rcm_case_has_at_most_one_macroscopic_image(): MetadataRow(image_type="RCM: macroscopic", rcm_case_id="bar"), ] ) + + +def test_rcm_cases_belong_to_same_lesion(): + with pytest.raises(ValidationError) as excinfo: + MetadataBatch( + items=[ + MetadataRow( + rcm_case_id="foo", lesion_id="foolesion", _ignore_rcm_model_checks=True + ), + MetadataRow( + rcm_case_id="foo", lesion_id="barlesion", _ignore_rcm_model_checks=True + ), + ] + ) + assert len(excinfo.value.errors()) == 1 + assert "belong to multiple lesions" in excinfo.value.errors()[0]["msg"] + + +def test_blank_rcm_cases_dont_belong_to_same_lesion(): + MetadataBatch( + items=[ + MetadataRow(rcm_case_id="", lesion_id="foolesion"), + MetadataRow(rcm_case_id="", lesion_id="barlesion"), + ] + )