feat: add preparations view

This contains all the stuff required for the preps view including link to specimen and testing. This also includes a rewrite of the key importer tests as they took ages to run and weren't precise enough to actually be testing what we needed to test.
NaturalHistoryMuseum · Nov 8, 2023 · 3cc42f4 · 3cc42f4
1 parent b339b57
commit 3cc42f4
Show file tree

Hide file tree

Showing 8 changed files with 1,060 additions and 234 deletions.
diff --git a/dataimporter/emu/views/preparation.py b/dataimporter/emu/views/preparation.py
@@ -0,0 +1,82 @@
+from dataimporter.emu.views.utils import (
+    NO_PUBLISH,
+    DISALLOWED_STATUSES,
+    DEPARTMENT_COLLECTION_CODES,
+    INVALID_STATUS,
+    INVALID_DEPARTMENT,
+    INVALID_TYPE,
+    is_web_published,
+    is_valid_guid,
+    INVALID_GUID,
+)
+from dataimporter.emu.views.utils import emu_date
+from dataimporter.model import SourceRecord
+from dataimporter.view import View, FilterResult, SUCCESS_RESULT
+
+INVALID_SUBDEPARTMENT = FilterResult(False, "Invalid subdepartment")
+
+
+class PreparationView(View):
+    """
+    View for preparation records.
+
+    This view populates the preparation resource on the Data Portal.
+    """
+
+    def is_member(self, record: SourceRecord) -> FilterResult:
+        """
+        Filters the given record, determining whether it should be included in the
+        preparation resource or not.
+
+        :param record: the record to filter
+        :return: a FilterResult object
+        """
+        if record.get_first_value("ColRecordType", default="").lower() != "preparation":
+            return INVALID_TYPE
+
+        if not is_web_published(record):
+            return NO_PUBLISH
+
+        if not is_valid_guid(record):
+            return INVALID_GUID
+
+        if record.get_first_value("SecRecordStatus") in DISALLOWED_STATUSES:
+            return INVALID_STATUS
+
+        if record.get_first_value("ColDepartment") not in DEPARTMENT_COLLECTION_CODES:
+            return INVALID_DEPARTMENT
+
+        if record.get_first_value("ColSubDepartment") != "Molecular Collections":
+            return INVALID_SUBDEPARTMENT
+
+        return SUCCESS_RESULT
+
+    def make_data(self, record: SourceRecord) -> dict:
+        """
+        Converts the record's raw data to a dict which will be the data presented on the
+        Data Portal.
+
+        :param record: the record to project
+        :return: a dict containing the data for this record that should be displayed on
+                 the Data Portal
+        """
+        # cache these for perf
+        get_all = record.get_all_values
+        get_first = record.get_first_value
+
+        return {
+            "_id": record.id,
+            "created": emu_date(
+                get_first("AdmDateInserted"), get_first("AdmTimeInserted")
+            ),
+            "modified": emu_date(
+                get_first("AdmDateModified"), get_first("AdmTimeModified")
+            ),
+            "project": get_all("NhmSecProjectName"),
+            "preparationNumber": get_first("EntPreNumber"),
+            "preparationType": get_first("EntPrePreparationKind"),
+            "mediumType": get_first("EntPreStorageMedium"),
+            "preparationProcess": get_first("EntPrePreparationMethod"),
+            "preparationContents": get_first("EntPreContents"),
+            "preparationDate": get_first("EntPreDate"),
+        }
diff --git a/dataimporter/importer.py b/dataimporter/importer.py
@@ -20,10 +20,16 @@
 from dataimporter.emu.views.image import ImageView
 from dataimporter.emu.views.indexlot import IndexLotView
 from dataimporter.emu.views.mss import MSSView
+from dataimporter.emu.views.preparation import PreparationView
 from dataimporter.emu.views.specimen import SpecimenView
 from dataimporter.emu.views.taxonomy import TaxonomyView
 from dataimporter.ext.gbif import GBIFView, get_changed_records
-from dataimporter.links import MediaLink, TaxonomyLink, GBIFLink
+from dataimporter.links import (
+    MediaLink,
+    TaxonomyLink,
+    GBIFLink,
+    PreparationSpecimenLink,
+)
 from dataimporter.model import SourceRecord
 from dataimporter.view import View, ViewLink
 
@@ -76,6 +82,7 @@ def __init__(self, config: Config):
         artefact_view = ArtefactView(self.views_path / "artefact", ecatalogue_db)
         indexlot_view = IndexLotView(self.views_path / "indexlot", ecatalogue_db)
         specimen_view = SpecimenView(self.views_path / "specimen", ecatalogue_db)
+        prep_view = PreparationView(self.views_path / "preparation", ecatalogue_db)
 
         # CREATE THE VIEW LINKS
         # first artefact links
@@ -108,6 +115,11 @@ def __init__(self, config: Config):
             self.links_path / "specimen_gbif", specimen_view, gbif_view
         )
 
+        # next preparation view
+        preparation_specimen = PreparationSpecimenLink(
+            self.links_path / "preparation_specimen", prep_view, specimen_view
+        )
+
         # SETUP STATE
         # store all the dbs, view, and links in dicts for easy access via their names
         self.dbs: Dict[str, DataDB] = {
@@ -123,6 +135,7 @@ def __init__(self, config: Config):
                 artefact_view,
                 indexlot_view,
                 specimen_view,
+                prep_view,
             ]
         }
         self.links: Dict[str, ViewLink] = {
@@ -134,6 +147,7 @@ def __init__(self, config: Config):
                 specimen_images,
                 specimen_taxonomy,
                 specimen_gbif,
+                preparation_specimen,
             ]
         }
 
@@ -146,12 +160,13 @@ def __init__(self, config: Config):
             "indexlot": SplitgillDatabase(config.indexlot_id, self.client),
             "artefact": SplitgillDatabase(config.artefact_id, self.client),
             "mss": SplitgillDatabase("mss", self.client),
+            "preparation": SplitgillDatabase(config.preparation_id, self.client),
         }
 
         # a database for each data db's redacted IDs to be stored in
         self.redaction_database = RedactionDB(config.data_path / "redactions")
 
-    def _queue_changes(self, records: Iterable[SourceRecord], db_name: str):
+    def queue_changes(self, records: Iterable[SourceRecord], db_name: str):
         """
         Update the records in the data DB with the given name. The views based on the DB
         that is being updated will also be updated.
@@ -197,7 +212,7 @@ def queue_emu_changes(self):
                 # record refers to a potentially different table from which it is
                 # deleting a record
                 if dump.table != "eaudit":
-                    self._queue_changes(dump.read(), dump.table)
+                    self.queue_changes(dump.read(), dump.table)
                 else:
                     # wrap the dump stream in a filter to only allow through records we
                     # want to process
@@ -211,7 +226,7 @@ def queue_emu_changes(self):
                     ):
                         # convert the raw audit records into delete records as we queue
                         # them
-                        self._queue_changes(
+                        self.queue_changes(
                             map(convert_eaudit_to_delete, records), table
                         )
             # we've handled all the dumps from this date, update the last date stored on
@@ -223,7 +238,7 @@ def queue_gbif_changes(self):
         Retrieve the latest GBIF records, check which ones have changed compared to the
         ones stored in the gbif data DB, and then queue them into the GBIF view.
         """
-        self._queue_changes(
+        self.queue_changes(
             get_changed_records(
                 self.dbs["gbif"], self.config.gbif_username, self.config.gbif_password
             ),

diff --git a/dataimporter/links.py b/dataimporter/links.py
@@ -291,3 +291,110 @@ def clear_from_foreign(self):
         Clears out the gbif (foreign) ID to occurrence ID map.
         """
         self.gbif_id_map.clear()
+
+
+class PreparationSpecimenLink(ViewLink):
+    """
+    A ViewLink representing the link between a preparation record and the specimen
+    voucher record it was created from.
+
+    The mapping is one-to-one with exactly one ID sourced from the base prep record.
+    When transforming the base record using the linked specimen record, we copy some
+    fields from the specimen record over to the base prep record, essentially for
+    searching convenience. The full list of fields that are copied is below.
+    """
+
+    # the EMu field on the prep records which links to the specimen voucher record
+    SPECIMEN_ID_REF_FIELD = "EntPreSpecimenRef"
+    # the Portal fields which are copied from the specimen to the prep data dict
+    # TODO: missing CollEventDateVisitedFrom, CollEventName_tab, and kinda ColSite
+    MAPPED_SPECIMEN_FIELDS = [
+        "barcode",
+        "scientificName",
+        "order",
+        "identifiedBy",
+        # this is a ColSite substitute which uses sumPreciseLocation
+        "locality",
+        "decimalLatitude",
+        "decimalLongitude",
+    ]
+
+    def __init__(self, path: Path, prep_view: View, specimen_view: View):
+        """
+        :param path: the path to store the ViewLink data in
+        :param prep_view: the preparation view
+        :param specimen_view: the specimen view
+        """
+        super().__init__(path.name, prep_view, specimen_view)
+        self.path = path
+        # a one-to-one index from prep id -> specimen id
+        self.id_map = Index(path / "id_map")
+
+    def update_from_base(self, prep_records: List[SourceRecord]):
+        """
+        Extracts the linked specimen ID from each of the given prep records and adds
+        them to the ID map.
+
+        :param prep_records: the changed prep records
+        """
+        self.id_map.put_one_to_one(
+            (prep_record.id, specimen_id)
+            for prep_record in prep_records
+            if (
+                specimen_id := prep_record.get_first_value(
+                    PreparationSpecimenLink.SPECIMEN_ID_REF_FIELD
+                )
+            )
+        )
+
+    def update_from_foreign(self, specimen_records: List[SourceRecord]):
+        """
+        Propagate the changes in the given specimen records to the base prep records
+        linked to them.
+
+        :param specimen_records: the updated specimen records
+        """
+        base_ids = {
+            base_id
+            for specimen_record in specimen_records
+            for base_id in self.id_map.reverse_get(specimen_record.id)
+        }
+
+        if base_ids:
+            base_records = list(self.base_view.db.get_records(base_ids))
+            if base_records:
+                # if there are associated base records, queue changes to them on the
+                # base view
+                self.base_view.queue(base_records)
+
+    def transform(self, prep_record: SourceRecord, data: dict):
+        """
+        Transform the given prep record's data with data from the linked voucher
+        specimen, if one exists.
+
+        :param prep_record: the prep record
+        :param data: the data dict to update
+        """
+        specimen_id = prep_record.get_first_value(
+            PreparationSpecimenLink.SPECIMEN_ID_REF_FIELD
+        )
+        if specimen_id:
+            specimen = self.foreign_view.get_and_transform(specimen_id)
+            if specimen is not None:
+                # from DwC
+                data[
+                    "associatedOccurrences"
+                ] = f"Voucher: {specimen.pop('occurrenceID')}"
+                # not from DwC
+                data["specimenID"] = specimen.pop("_id")
+                data.update(
+                    (field, value)
+                    for field in PreparationSpecimenLink.MAPPED_SPECIMEN_FIELDS
+                    if (value := specimen.get(field)) is not None
+                )
+
+    def clear_from_base(self):
+        """
+        Clears out the ID map.
+        """
+        self.id_map.clear()
diff --git a/tests/emu/views/test_preparation.py b/tests/emu/views/test_preparation.py
@@ -0,0 +1,73 @@
+from contextlib import closing
+from pathlib import Path
+from typing import List, Tuple
+
+import pytest
+
+from dataimporter.dbs import DataDB
+from dataimporter.emu.views.utils import (
+    NO_PUBLISH,
+    INVALID_TYPE,
+    INVALID_GUID,
+    INVALID_STATUS,
+    INVALID_DEPARTMENT,
+)
+from dataimporter.model import SourceRecord
+from dataimporter.view import FilterResult, SUCCESS_RESULT
+from dataimporter.emu.views.preparation import PreparationView, INVALID_SUBDEPARTMENT
+from tests.helpers.samples.preparation import (
+    SAMPLE_PREPARATION_DATA,
+    SAMPLE_PREPARATION_ID,
+)
+
+
+@pytest.fixture
+def prep_view(tmp_path: Path) -> PreparationView:
+    with closing(
+        PreparationView(tmp_path / "prep_view", DataDB(tmp_path / "prep_data"))
+    ) as view:
+        yield view
+
+
+is_member_scenarios: List[Tuple[dict, FilterResult]] = [
+    ({"ColRecordType": "Specimen"}, INVALID_TYPE),
+    ({"AdmPublishWebNoPasswordFlag": "n"}, NO_PUBLISH),
+    ({"AdmGUIDPreferredValue": "not a valid guid!"}, INVALID_GUID),
+    ({"SecRecordStatus": "INVALID"}, INVALID_STATUS),
+    ({"ColDepartment": "DDI"}, INVALID_DEPARTMENT),
+    ({"ColSubDepartment": "Informatics"}, INVALID_SUBDEPARTMENT),
+    ({}, SUCCESS_RESULT),
+]
+
+
+@pytest.mark.parametrize("overrides, result", is_member_scenarios)
+def test_is_member(overrides: dict, result: FilterResult, prep_view: PreparationView):
+    data = {**SAMPLE_PREPARATION_DATA, **overrides}
+    record = SourceRecord(SAMPLE_PREPARATION_ID, data, "test")
+    assert prep_view.is_member(record) == result
+
+
+def test_transform_deleted(prep_view: PreparationView):
+    record = SourceRecord(SAMPLE_PREPARATION_ID, {}, "test")
+    assert record.is_deleted
+
+    data = prep_view.transform(record)
+    assert data == {}
+
+
+def test_make_data(prep_view: PreparationView):
+    record = SourceRecord(SAMPLE_PREPARATION_ID, SAMPLE_PREPARATION_DATA, "test")
+
+    data = prep_view.make_data(record)
+    assert data == {
+        "_id": record.id,
+        "created": "2022-09-12T17:07:51+00:00",
+        "modified": "2022-09-12T17:21:14+00:00",
+        "project": "Darwin Tree of Life",
+        "preparationNumber": "C9K02TWP_B2",
+        "preparationType": "DNA Extract",
+        "mediumType": None,
+        "preparationProcess": None,
+        "preparationContents": "**OTHER_SOMATIC_ANIMAL_TISSUE**",
+        "preparationDate": "2022-05-09",
+    }