Skip to content

Commit

Permalink
feat: add preparations view
Browse files Browse the repository at this point in the history
This contains all the stuff required for the preps view including link to specimen and testing. This also includes a rewrite of the key importer tests as they took ages to run and weren't precise enough to actually be testing what we needed to test.
  • Loading branch information
jrdh committed Nov 8, 2023
1 parent b339b57 commit 3cc42f4
Show file tree
Hide file tree
Showing 8 changed files with 1,060 additions and 234 deletions.
82 changes: 82 additions & 0 deletions dataimporter/emu/views/preparation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
from dataimporter.emu.views.utils import (
NO_PUBLISH,
DISALLOWED_STATUSES,
DEPARTMENT_COLLECTION_CODES,
INVALID_STATUS,
INVALID_DEPARTMENT,
INVALID_TYPE,
is_web_published,
is_valid_guid,
INVALID_GUID,
)
from dataimporter.emu.views.utils import emu_date
from dataimporter.model import SourceRecord
from dataimporter.view import View, FilterResult, SUCCESS_RESULT

INVALID_SUBDEPARTMENT = FilterResult(False, "Invalid subdepartment")


class PreparationView(View):
"""
View for preparation records.
This view populates the preparation resource on the Data Portal.
"""

def is_member(self, record: SourceRecord) -> FilterResult:
"""
Filters the given record, determining whether it should be included in the
preparation resource or not.
:param record: the record to filter
:return: a FilterResult object
"""
if record.get_first_value("ColRecordType", default="").lower() != "preparation":
return INVALID_TYPE

if not is_web_published(record):
return NO_PUBLISH

if not is_valid_guid(record):
return INVALID_GUID

if record.get_first_value("SecRecordStatus") in DISALLOWED_STATUSES:
return INVALID_STATUS

if record.get_first_value("ColDepartment") not in DEPARTMENT_COLLECTION_CODES:
return INVALID_DEPARTMENT

if record.get_first_value("ColSubDepartment") != "Molecular Collections":
return INVALID_SUBDEPARTMENT

return SUCCESS_RESULT

def make_data(self, record: SourceRecord) -> dict:
"""
Converts the record's raw data to a dict which will be the data presented on the
Data Portal.
:param record: the record to project
:return: a dict containing the data for this record that should be displayed on
the Data Portal
"""
# cache these for perf
get_all = record.get_all_values
get_first = record.get_first_value

return {
"_id": record.id,
"created": emu_date(
get_first("AdmDateInserted"), get_first("AdmTimeInserted")
),
"modified": emu_date(
get_first("AdmDateModified"), get_first("AdmTimeModified")
),
"project": get_all("NhmSecProjectName"),
"preparationNumber": get_first("EntPreNumber"),
"preparationType": get_first("EntPrePreparationKind"),
"mediumType": get_first("EntPreStorageMedium"),
"preparationProcess": get_first("EntPrePreparationMethod"),
"preparationContents": get_first("EntPreContents"),
"preparationDate": get_first("EntPreDate"),
}
25 changes: 20 additions & 5 deletions dataimporter/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,16 @@
from dataimporter.emu.views.image import ImageView
from dataimporter.emu.views.indexlot import IndexLotView
from dataimporter.emu.views.mss import MSSView
from dataimporter.emu.views.preparation import PreparationView
from dataimporter.emu.views.specimen import SpecimenView
from dataimporter.emu.views.taxonomy import TaxonomyView
from dataimporter.ext.gbif import GBIFView, get_changed_records
from dataimporter.links import MediaLink, TaxonomyLink, GBIFLink
from dataimporter.links import (
MediaLink,
TaxonomyLink,
GBIFLink,
PreparationSpecimenLink,
)
from dataimporter.model import SourceRecord
from dataimporter.view import View, ViewLink

Expand Down Expand Up @@ -76,6 +82,7 @@ def __init__(self, config: Config):
artefact_view = ArtefactView(self.views_path / "artefact", ecatalogue_db)
indexlot_view = IndexLotView(self.views_path / "indexlot", ecatalogue_db)
specimen_view = SpecimenView(self.views_path / "specimen", ecatalogue_db)
prep_view = PreparationView(self.views_path / "preparation", ecatalogue_db)

# CREATE THE VIEW LINKS
# first artefact links
Expand Down Expand Up @@ -108,6 +115,11 @@ def __init__(self, config: Config):
self.links_path / "specimen_gbif", specimen_view, gbif_view
)

# next preparation view
preparation_specimen = PreparationSpecimenLink(
self.links_path / "preparation_specimen", prep_view, specimen_view
)

# SETUP STATE
# store all the dbs, view, and links in dicts for easy access via their names
self.dbs: Dict[str, DataDB] = {
Expand All @@ -123,6 +135,7 @@ def __init__(self, config: Config):
artefact_view,
indexlot_view,
specimen_view,
prep_view,
]
}
self.links: Dict[str, ViewLink] = {
Expand All @@ -134,6 +147,7 @@ def __init__(self, config: Config):
specimen_images,
specimen_taxonomy,
specimen_gbif,
preparation_specimen,
]
}

Expand All @@ -146,12 +160,13 @@ def __init__(self, config: Config):
"indexlot": SplitgillDatabase(config.indexlot_id, self.client),
"artefact": SplitgillDatabase(config.artefact_id, self.client),
"mss": SplitgillDatabase("mss", self.client),
"preparation": SplitgillDatabase(config.preparation_id, self.client),
}

# a database for each data db's redacted IDs to be stored in
self.redaction_database = RedactionDB(config.data_path / "redactions")

def _queue_changes(self, records: Iterable[SourceRecord], db_name: str):
def queue_changes(self, records: Iterable[SourceRecord], db_name: str):
"""
Update the records in the data DB with the given name. The views based on the DB
that is being updated will also be updated.
Expand Down Expand Up @@ -197,7 +212,7 @@ def queue_emu_changes(self):
# record refers to a potentially different table from which it is
# deleting a record
if dump.table != "eaudit":
self._queue_changes(dump.read(), dump.table)
self.queue_changes(dump.read(), dump.table)
else:
# wrap the dump stream in a filter to only allow through records we
# want to process
Expand All @@ -211,7 +226,7 @@ def queue_emu_changes(self):
):
# convert the raw audit records into delete records as we queue
# them
self._queue_changes(
self.queue_changes(
map(convert_eaudit_to_delete, records), table
)
# we've handled all the dumps from this date, update the last date stored on
Expand All @@ -223,7 +238,7 @@ def queue_gbif_changes(self):
Retrieve the latest GBIF records, check which ones have changed compared to the
ones stored in the gbif data DB, and then queue them into the GBIF view.
"""
self._queue_changes(
self.queue_changes(
get_changed_records(
self.dbs["gbif"], self.config.gbif_username, self.config.gbif_password
),
Expand Down
107 changes: 107 additions & 0 deletions dataimporter/links.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,3 +291,110 @@ def clear_from_foreign(self):
Clears out the gbif (foreign) ID to occurrence ID map.
"""
self.gbif_id_map.clear()


class PreparationSpecimenLink(ViewLink):
"""
A ViewLink representing the link between a preparation record and the specimen
voucher record it was created from.
The mapping is one-to-one with exactly one ID sourced from the base prep record.
When transforming the base record using the linked specimen record, we copy some
fields from the specimen record over to the base prep record, essentially for
searching convenience. The full list of fields that are copied is below.
"""

# the EMu field on the prep records which links to the specimen voucher record
SPECIMEN_ID_REF_FIELD = "EntPreSpecimenRef"
# the Portal fields which are copied from the specimen to the prep data dict
# TODO: missing CollEventDateVisitedFrom, CollEventName_tab, and kinda ColSite
MAPPED_SPECIMEN_FIELDS = [
"barcode",
"scientificName",
"order",
"identifiedBy",
# this is a ColSite substitute which uses sumPreciseLocation
"locality",
"decimalLatitude",
"decimalLongitude",
]

def __init__(self, path: Path, prep_view: View, specimen_view: View):
"""
:param path: the path to store the ViewLink data in
:param prep_view: the preparation view
:param specimen_view: the specimen view
"""
super().__init__(path.name, prep_view, specimen_view)
self.path = path
# a one-to-one index from prep id -> specimen id
self.id_map = Index(path / "id_map")

def update_from_base(self, prep_records: List[SourceRecord]):
"""
Extracts the linked specimen ID from each of the given prep records and adds
them to the ID map.
:param prep_records: the changed prep records
"""
self.id_map.put_one_to_one(
(prep_record.id, specimen_id)
for prep_record in prep_records
if (
specimen_id := prep_record.get_first_value(
PreparationSpecimenLink.SPECIMEN_ID_REF_FIELD
)
)
)

def update_from_foreign(self, specimen_records: List[SourceRecord]):
"""
Propagate the changes in the given specimen records to the base prep records
linked to them.
:param specimen_records: the updated specimen records
"""
base_ids = {
base_id
for specimen_record in specimen_records
for base_id in self.id_map.reverse_get(specimen_record.id)
}

if base_ids:
base_records = list(self.base_view.db.get_records(base_ids))
if base_records:
# if there are associated base records, queue changes to them on the
# base view
self.base_view.queue(base_records)

def transform(self, prep_record: SourceRecord, data: dict):
"""
Transform the given prep record's data with data from the linked voucher
specimen, if one exists.
:param prep_record: the prep record
:param data: the data dict to update
"""
specimen_id = prep_record.get_first_value(
PreparationSpecimenLink.SPECIMEN_ID_REF_FIELD
)
if specimen_id:
specimen = self.foreign_view.get_and_transform(specimen_id)
if specimen is not None:
# from DwC
data[
"associatedOccurrences"
] = f"Voucher: {specimen.pop('occurrenceID')}"
# not from DwC
data["specimenID"] = specimen.pop("_id")
data.update(
(field, value)
for field in PreparationSpecimenLink.MAPPED_SPECIMEN_FIELDS
if (value := specimen.get(field)) is not None
)

def clear_from_base(self):
"""
Clears out the ID map.
"""
self.id_map.clear()
73 changes: 73 additions & 0 deletions tests/emu/views/test_preparation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from contextlib import closing
from pathlib import Path
from typing import List, Tuple

import pytest

from dataimporter.dbs import DataDB
from dataimporter.emu.views.utils import (
NO_PUBLISH,
INVALID_TYPE,
INVALID_GUID,
INVALID_STATUS,
INVALID_DEPARTMENT,
)
from dataimporter.model import SourceRecord
from dataimporter.view import FilterResult, SUCCESS_RESULT
from dataimporter.emu.views.preparation import PreparationView, INVALID_SUBDEPARTMENT
from tests.helpers.samples.preparation import (
SAMPLE_PREPARATION_DATA,
SAMPLE_PREPARATION_ID,
)


@pytest.fixture
def prep_view(tmp_path: Path) -> PreparationView:
with closing(
PreparationView(tmp_path / "prep_view", DataDB(tmp_path / "prep_data"))
) as view:
yield view


is_member_scenarios: List[Tuple[dict, FilterResult]] = [
({"ColRecordType": "Specimen"}, INVALID_TYPE),
({"AdmPublishWebNoPasswordFlag": "n"}, NO_PUBLISH),
({"AdmGUIDPreferredValue": "not a valid guid!"}, INVALID_GUID),
({"SecRecordStatus": "INVALID"}, INVALID_STATUS),
({"ColDepartment": "DDI"}, INVALID_DEPARTMENT),
({"ColSubDepartment": "Informatics"}, INVALID_SUBDEPARTMENT),
({}, SUCCESS_RESULT),
]


@pytest.mark.parametrize("overrides, result", is_member_scenarios)
def test_is_member(overrides: dict, result: FilterResult, prep_view: PreparationView):
data = {**SAMPLE_PREPARATION_DATA, **overrides}
record = SourceRecord(SAMPLE_PREPARATION_ID, data, "test")
assert prep_view.is_member(record) == result


def test_transform_deleted(prep_view: PreparationView):
record = SourceRecord(SAMPLE_PREPARATION_ID, {}, "test")
assert record.is_deleted

data = prep_view.transform(record)
assert data == {}


def test_make_data(prep_view: PreparationView):
record = SourceRecord(SAMPLE_PREPARATION_ID, SAMPLE_PREPARATION_DATA, "test")

data = prep_view.make_data(record)
assert data == {
"_id": record.id,
"created": "2022-09-12T17:07:51+00:00",
"modified": "2022-09-12T17:21:14+00:00",
"project": "Darwin Tree of Life",
"preparationNumber": "C9K02TWP_B2",
"preparationType": "DNA Extract",
"mediumType": None,
"preparationProcess": None,
"preparationContents": "**OTHER_SOMATIC_ANIMAL_TISSUE**",
"preparationDate": "2022-05-09",
}
Loading

0 comments on commit 3cc42f4

Please sign in to comment.