From fd7fd29dd5171f7cd88dbc078ec9932243f12fbc Mon Sep 17 00:00:00 2001 From: Rithvik Nishad Date: Fri, 6 Oct 2023 00:15:15 +0530 Subject: [PATCH 1/2] Refactor ICD11 Table; Cleanup in-memory search (#1636) * Refactor ICD11 Table * optimize memory usage * gracefully handle relation not existing * check table exists before loading from db * Apply suggestions from code review Co-authored-by: Aakash Singh * move fetch_data to commands * fix id type in in-memory * patch * merge migrations --------- Co-authored-by: Aakash Singh --- ...gnosis.py => load_icd11_diagnoses_data.py} | 83 ++++++++++++------- .../migrations/0388_icd11diagnosis.py | 57 +++++++++++++ .../migrations/0389_merge_20231005_2247.py | 12 +++ care/facility/models/__init__.py | 1 + care/facility/models/icd11_diagnosis.py | 33 ++++++++ care/facility/models/meta_icd11_diagnosis.py | 2 + care/facility/static_data/icd11.py | 48 ++++------- 7 files changed, 177 insertions(+), 59 deletions(-) rename care/facility/management/commands/{load_meta_icd11_diagnosis.py => load_icd11_diagnoses_data.py} (68%) create mode 100644 care/facility/migrations/0388_icd11diagnosis.py create mode 100644 care/facility/migrations/0389_merge_20231005_2247.py create mode 100644 care/facility/models/icd11_diagnosis.py diff --git a/care/facility/management/commands/load_meta_icd11_diagnosis.py b/care/facility/management/commands/load_icd11_diagnoses_data.py similarity index 68% rename from care/facility/management/commands/load_meta_icd11_diagnosis.py rename to care/facility/management/commands/load_icd11_diagnoses_data.py index 1ffae591b0..4ad383bba7 100644 --- a/care/facility/management/commands/load_meta_icd11_diagnosis.py +++ b/care/facility/management/commands/load_icd11_diagnoses_data.py @@ -1,17 +1,46 @@ +import json + from django.core.management import BaseCommand, CommandError -from care.facility.models.meta_icd11_diagnosis import MetaICD11Diagnosis -from care.facility.static_data.icd11 import fetch_data +from care.facility.models.icd11_diagnosis import ICD11Diagnosis + + +def fetch_data(): + with open("data/icd11.json", "r") as json_file: + return json.load(json_file) + + +ICD11_ID_SUFFIX_TO_INT = { + "mms": 1, + "other": 2, + "unspecified": 3, +} + + +def icd11_id_to_int(icd11_id): + """ + Maps ICD11 ID to an integer. + + Eg: + - http://id.who.int/icd/entity/594985340 -> 594985340 + - http://id.who.int/icd/entity/594985340/mms -> 5949853400001 + - http://id.who.int/icd/entity/594985340/mms/unspecified -> 5949853400003 + """ + entity_id = icd11_id.replace("http://id.who.int/icd/entity/", "") + if entity_id.isnumeric(): + return int(entity_id) + segments = entity_id.split("/") + return int(segments[0]) * 1e3 + ICD11_ID_SUFFIX_TO_INT[segments[-1]] class Command(BaseCommand): """ Management command to load ICD11 diagnoses to database. Not for production use. - Usage: python manage.py load_meta_icd11_diagnosis + Usage: python manage.py load_icd11_diagnoses """ - help = "Loads ICD11 data to a table in to database." + help = "Loads ICD11 diagnoses data to database" data = [] roots_lookup = {} @@ -29,8 +58,9 @@ class Command(BaseCommand): """ CLASS_KIND_DB_KEYS = { - "block": "root_block", - "category": "root_category", + "chapter": "meta_chapter", + "block": "meta_root_block", + "category": "meta_root_category", } ICD11_GROUP_LABEL_PRETTY = { @@ -55,13 +85,12 @@ class Command(BaseCommand): "19 Certain conditions originating in the perinatal period": "Neonatology", "20 Developmental anomalies": "Developmental Anomalies", "21 Symptoms, signs or clinical findings, not elsewhere classified": "Others", - "22 Injury, poisoning or certain other consequences of external causes": "Injury, Poisoning ", + "22 Injury, poisoning or certain other consequences of external causes": "Injury, Poisoning", "23 External causes of morbidity or mortality": "External Causes of Injury", "24 Factors influencing health status or contact with health services": None, "25 Codes for special purposes": "Codes for special purposes", "26 Supplementary Chapter Traditional Medicine Conditions - Module I": None, - "V Supplementary section for functioning assessment": "Functioning assessment ", - "X Extension Codes": "NOT RELEVANT", + "V Supplementary section for functioning assessment": "Functioning assessment", } def find_roots(self, item): @@ -98,7 +127,7 @@ def my(x): ) def handle(self, *args, **options): - print("Loading ICD11 data to DB Table (meta_icd11_diagnosis)...") + print("Loading ICD11 diagnoses data to database...") try: self.data = fetch_data() @@ -110,29 +139,27 @@ def roots(item): result = { self.CLASS_KIND_DB_KEYS.get(k, k): v for k, v in roots.items() } - result["chapter_short"] = mapped - result["deleted"] = mapped is None + result["meta_chapter_short"] = mapped + result["meta_hidden"] = mapped is None return result - MetaICD11Diagnosis.objects.all().delete() - MetaICD11Diagnosis.objects.bulk_create( + ICD11Diagnosis.objects.all().delete() + ICD11Diagnosis.objects.bulk_create( [ - MetaICD11Diagnosis( - id=icd11_object["ID"], - _id=int(icd11_object["ID"].split("/")[-1]), - average_depth=icd11_object["averageDepth"], - is_adopted_child=icd11_object["isAdoptedChild"], - parent_id=icd11_object["parentId"], - class_kind=icd11_object["classKind"], - is_leaf=icd11_object["isLeaf"], - label=icd11_object["label"], - breadth_value=icd11_object["breadthValue"], - **roots(icd11_object), + ICD11Diagnosis( + id=icd11_id_to_int(obj["ID"]), + icd11_id=obj["ID"], + label=obj["label"], + class_kind=obj["classKind"], + is_leaf=obj["isLeaf"], + parent_id=obj["parentId"] and icd11_id_to_int(obj["parentId"]), + average_depth=obj["averageDepth"], + is_adopted_child=obj["isAdoptedChild"], + breadth_value=obj["breadthValue"], + **roots(obj), ) - for icd11_object in self.data - if icd11_object["ID"].split("/")[-1].isnumeric() + for obj in self.data ] ) - print("Done loading ICD11 data to database.") except Exception as e: raise CommandError(e) diff --git a/care/facility/migrations/0388_icd11diagnosis.py b/care/facility/migrations/0388_icd11diagnosis.py new file mode 100644 index 0000000000..97de51a8c9 --- /dev/null +++ b/care/facility/migrations/0388_icd11diagnosis.py @@ -0,0 +1,57 @@ +# Generated by Django 4.2.2 on 2023-09-25 13:00 + +import django.db.models.deletion +from django.core.management import call_command +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("facility", "0387_merge_20230911_2303"), + ] + + operations = [ + migrations.CreateModel( + name="ICD11Diagnosis", + fields=[ + ("id", models.BigIntegerField(primary_key=True, serialize=False)), + ("icd11_id", models.CharField(max_length=255, unique=True)), + ("label", models.CharField(max_length=255)), + ( + "class_kind", + models.CharField( + choices=[ + ("chapter", "Chapter"), + ("block", "Block"), + ("category", "Category"), + ], + max_length=255, + ), + ), + ("is_leaf", models.BooleanField()), + ("average_depth", models.IntegerField()), + ("is_adopted_child", models.BooleanField()), + ( + "breadth_value", + models.DecimalField(decimal_places=22, max_digits=24), + ), + ("meta_hidden", models.BooleanField(default=False)), + ("meta_chapter", models.CharField(max_length=255)), + ("meta_chapter_short", models.CharField(max_length=255, null=True)), + ("meta_root_block", models.CharField(max_length=255, null=True)), + ("meta_root_category", models.CharField(max_length=255, null=True)), + ( + "parent", + models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.DO_NOTHING, + to="facility.icd11diagnosis", + ), + ), + ], + ), + migrations.RunPython( + lambda apps, schema_editor: call_command("load_icd11_diagnoses_data"), + reverse_code=migrations.RunPython.noop, + ), + ] diff --git a/care/facility/migrations/0389_merge_20231005_2247.py b/care/facility/migrations/0389_merge_20231005_2247.py new file mode 100644 index 0000000000..28de9d2966 --- /dev/null +++ b/care/facility/migrations/0389_merge_20231005_2247.py @@ -0,0 +1,12 @@ +# Generated by Django 4.2.5 on 2023-10-05 17:17 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("facility", "0388_goal_goalentry_goalproperty_goalpropertyentry"), + ("facility", "0388_icd11diagnosis"), + ] + + operations = [] diff --git a/care/facility/models/__init__.py b/care/facility/models/__init__.py index 2c4ef59373..92cf8dc70b 100644 --- a/care/facility/models/__init__.py +++ b/care/facility/models/__init__.py @@ -6,6 +6,7 @@ from .bed import * # noqa from .daily_round import * # noqa from .facility import * # noqa +from .icd11_diagnosis import * # noqa from .inventory import * # noqa from .meta_icd11_diagnosis import * # noqa from .patient import * # noqa diff --git a/care/facility/models/icd11_diagnosis.py b/care/facility/models/icd11_diagnosis.py new file mode 100644 index 0000000000..959340fe12 --- /dev/null +++ b/care/facility/models/icd11_diagnosis.py @@ -0,0 +1,33 @@ +from django.db import models + + +class ICD11ClassKind(models.TextChoices): + CHAPTER = "chapter" + BLOCK = "block" + CATEGORY = "category" + + +class ICD11Diagnosis(models.Model): + """ + Use ICDDiseases for in-memory search. + """ + + id = models.BigIntegerField(primary_key=True) + icd11_id = models.CharField(max_length=255, unique=True) + label = models.CharField(max_length=255) + class_kind = models.CharField(max_length=255, choices=ICD11ClassKind.choices) + is_leaf = models.BooleanField() + parent = models.ForeignKey("self", on_delete=models.DO_NOTHING, null=True) + average_depth = models.IntegerField() + is_adopted_child = models.BooleanField() + breadth_value = models.DecimalField(max_digits=24, decimal_places=22) + + # Meta fields + meta_hidden = models.BooleanField(default=False) + meta_chapter = models.CharField(max_length=255) + meta_chapter_short = models.CharField(max_length=255, null=True) + meta_root_block = models.CharField(max_length=255, null=True) + meta_root_category = models.CharField(max_length=255, null=True) + + def __str__(self) -> str: + return self.label diff --git a/care/facility/models/meta_icd11_diagnosis.py b/care/facility/models/meta_icd11_diagnosis.py index 4a2bb45da7..8d5a16bee0 100644 --- a/care/facility/models/meta_icd11_diagnosis.py +++ b/care/facility/models/meta_icd11_diagnosis.py @@ -4,6 +4,8 @@ class MetaICD11Diagnosis(models.Model): """ Not for production use. For Metabase purposes only. Do not build relations to this model. + + Deprecated in favor of ICD11Diagnosis. This table will be removed in the future. """ id = models.CharField(max_length=255, primary_key=True) diff --git a/care/facility/static_data/icd11.py b/care/facility/static_data/icd11.py index b535acd70c..dc987eae43 100644 --- a/care/facility/static_data/icd11.py +++ b/care/facility/static_data/icd11.py @@ -1,43 +1,29 @@ import contextlib -import json +from django.db import connection from littletable import Table +from care.facility.models.icd11_diagnosis import ICD11Diagnosis -def fetch_data(): - with open("data/icd11.json", "r") as json_file: - return json.load(json_file) - -def is_numeric(val): - if str(val).isnumeric(): - return val - return -1 +def fetch_from_db(): + # This is a hack to prevent the migration from failing when the table does not exist + all_tables = connection.introspection.table_names() + if "facility_icd11diagnosis" in all_tables: + return [ + { + "id": str(diagnosis["id"]), + "label": diagnosis["label"], + } + for diagnosis in ICD11Diagnosis.objects.filter(is_leaf=True).values( + "id", "label" + ) + ] + return [] ICDDiseases = Table("ICD11") -icd11_objects = fetch_data() -entity_id = "" -IGNORE_FIELDS = [ - "isLeaf", - "classKind", - "isAdoptedChild", - "averageDepth", - "breadthValue", - "Suggested", -] - -for icd11_object in icd11_objects: - for field in IGNORE_FIELDS: - icd11_object.pop(field, "") - icd11_object["id"] = icd11_object.pop("ID") - entity_id = icd11_object["id"].split("/")[-1] - icd11_object["id"] = is_numeric(entity_id) - if icd11_object["id"] == -1: - continue - if icd11_object["id"]: - ICDDiseases.insert(icd11_object) - +ICDDiseases.insert_many(fetch_from_db()) ICDDiseases.create_search_index("label") ICDDiseases.create_index("id", unique=True) From ab695cdde40b46041d176bd3fa268a45475a8d31 Mon Sep 17 00:00:00 2001 From: Rithvik Nishad Date: Fri, 6 Oct 2023 20:55:55 +0530 Subject: [PATCH 2/2] ICD11 patch: load all to in-memory; but search filter by `is_leaf=True` (#1661) * icd11: remove `is_leaf` filter for in-memory table * search api; filter by is_leaf=True --- care/facility/api/viewsets/icd.py | 3 ++- care/facility/static_data/icd11.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/care/facility/api/viewsets/icd.py b/care/facility/api/viewsets/icd.py index 2b5851d05c..56561b9b70 100644 --- a/care/facility/api/viewsets/icd.py +++ b/care/facility/api/viewsets/icd.py @@ -24,6 +24,7 @@ def list(self, request): if request.GET.get("query", False): query = request.GET.get("query") queryset = queryset.where( - label=queryset.re_match(r".*" + query + r".*", IGNORECASE) + label=queryset.re_match(r".*" + query + r".*", IGNORECASE), + is_leaf=True, ) # can accept regex from FE if needed. return Response(serailize_data(queryset[0:100])) diff --git a/care/facility/static_data/icd11.py b/care/facility/static_data/icd11.py index dc987eae43..ceb4aa48de 100644 --- a/care/facility/static_data/icd11.py +++ b/care/facility/static_data/icd11.py @@ -14,9 +14,10 @@ def fetch_from_db(): { "id": str(diagnosis["id"]), "label": diagnosis["label"], + "is_leaf": diagnosis["is_leaf"], } - for diagnosis in ICD11Diagnosis.objects.filter(is_leaf=True).values( - "id", "label" + for diagnosis in ICD11Diagnosis.objects.filter().values( + "id", "label", "is_leaf" ) ] return []