diff --git a/dataimporter/emu/views/preparation.py b/dataimporter/emu/views/preparation.py
new file mode 100644
index 0000000..ba652c7
--- /dev/null
+++ b/dataimporter/emu/views/preparation.py
@@ -0,0 +1,82 @@
+from dataimporter.emu.views.utils import (
+    NO_PUBLISH,
+    DISALLOWED_STATUSES,
+    DEPARTMENT_COLLECTION_CODES,
+    INVALID_STATUS,
+    INVALID_DEPARTMENT,
+    INVALID_TYPE,
+    is_web_published,
+    is_valid_guid,
+    INVALID_GUID,
+)
+from dataimporter.emu.views.utils import emu_date
+from dataimporter.model import SourceRecord
+from dataimporter.view import View, FilterResult, SUCCESS_RESULT
+
+INVALID_SUBDEPARTMENT = FilterResult(False, "Invalid subdepartment")
+
+
+class PreparationView(View):
+    """
+    View for preparation records.
+
+    This view populates the preparation resource on the Data Portal.
+    """
+
+    def is_member(self, record: SourceRecord) -> FilterResult:
+        """
+        Filters the given record, determining whether it should be included in the
+        preparation resource or not.
+
+        :param record: the record to filter
+        :return: a FilterResult object
+        """
+        if record.get_first_value("ColRecordType", default="").lower() != "preparation":
+            return INVALID_TYPE
+
+        if not is_web_published(record):
+            return NO_PUBLISH
+
+        if not is_valid_guid(record):
+            return INVALID_GUID
+
+        if record.get_first_value("SecRecordStatus") in DISALLOWED_STATUSES:
+            return INVALID_STATUS
+
+        if record.get_first_value("ColDepartment") not in DEPARTMENT_COLLECTION_CODES:
+            return INVALID_DEPARTMENT
+
+        if record.get_first_value("ColSubDepartment") != "Molecular Collections":
+            return INVALID_SUBDEPARTMENT
+
+        return SUCCESS_RESULT
+
+    def make_data(self, record: SourceRecord) -> dict:
+        """
+        Converts the record's raw data to a dict which will be the data presented on the
+        Data Portal.
+
+        :param record: the record to project
+        :return: a dict containing the data for this record that should be displayed on
+                 the Data Portal
+        """
+        # cache these for perf
+        get_all = record.get_all_values
+        get_first = record.get_first_value
+
+        return {
+            "_id": record.id,
+            "created": emu_date(
+                get_first("AdmDateInserted"), get_first("AdmTimeInserted")
+            ),
+            "modified": emu_date(
+                get_first("AdmDateModified"), get_first("AdmTimeModified")
+            ),
+            "project": get_all("NhmSecProjectName"),
+            "preparationNumber": get_first("EntPreNumber"),
+            "preparationType": get_first("EntPrePreparationKind"),
+            "mediumType": get_first("EntPreStorageMedium"),
+            "preparationProcess": get_first("EntPrePreparationMethod"),
+            "preparationContents": get_first("EntPreContents"),
+            "preparationDate": get_first("EntPreDate"),
+        }
diff --git a/dataimporter/importer.py b/dataimporter/importer.py
index 08bffb7..56fade3 100644
--- a/dataimporter/importer.py
+++ b/dataimporter/importer.py
@@ -20,10 +20,16 @@
 from dataimporter.emu.views.image import ImageView
 from dataimporter.emu.views.indexlot import IndexLotView
 from dataimporter.emu.views.mss import MSSView
+from dataimporter.emu.views.preparation import PreparationView
 from dataimporter.emu.views.specimen import SpecimenView
 from dataimporter.emu.views.taxonomy import TaxonomyView
 from dataimporter.ext.gbif import GBIFView, get_changed_records
-from dataimporter.links import MediaLink, TaxonomyLink, GBIFLink
+from dataimporter.links import (
+    MediaLink,
+    TaxonomyLink,
+    GBIFLink,
+    PreparationSpecimenLink,
+)
 from dataimporter.model import SourceRecord
 from dataimporter.view import View, ViewLink
 
@@ -76,6 +82,7 @@ def __init__(self, config: Config):
         artefact_view = ArtefactView(self.views_path / "artefact", ecatalogue_db)
         indexlot_view = IndexLotView(self.views_path / "indexlot", ecatalogue_db)
         specimen_view = SpecimenView(self.views_path / "specimen", ecatalogue_db)
+        prep_view = PreparationView(self.views_path / "preparation", ecatalogue_db)
 
         # CREATE THE VIEW LINKS
         # first artefact links
@@ -108,6 +115,11 @@ def __init__(self, config: Config):
             self.links_path / "specimen_gbif", specimen_view, gbif_view
         )
 
+        # next preparation view
+        preparation_specimen = PreparationSpecimenLink(
+            self.links_path / "preparation_specimen", prep_view, specimen_view
+        )
+
         # SETUP STATE
         # store all the dbs, view, and links in dicts for easy access via their names
         self.dbs: Dict[str, DataDB] = {
@@ -123,6 +135,7 @@ def __init__(self, config: Config):
                 artefact_view,
                 indexlot_view,
                 specimen_view,
+                prep_view,
             ]
         }
         self.links: Dict[str, ViewLink] = {
@@ -134,6 +147,7 @@ def __init__(self, config: Config):
                 specimen_images,
                 specimen_taxonomy,
                 specimen_gbif,
+                preparation_specimen,
             ]
         }
 
@@ -146,12 +160,13 @@ def __init__(self, config: Config):
             "indexlot": SplitgillDatabase(config.indexlot_id, self.client),
             "artefact": SplitgillDatabase(config.artefact_id, self.client),
             "mss": SplitgillDatabase("mss", self.client),
+            "preparation": SplitgillDatabase(config.preparation_id, self.client),
         }
 
         # a database for each data db's redacted IDs to be stored in
         self.redaction_database = RedactionDB(config.data_path / "redactions")
 
-    def _queue_changes(self, records: Iterable[SourceRecord], db_name: str):
+    def queue_changes(self, records: Iterable[SourceRecord], db_name: str):
         """
         Update the records in the data DB with the given name. The views based on the DB
         that is being updated will also be updated.
@@ -197,7 +212,7 @@ def queue_emu_changes(self):
                 # record refers to a potentially different table from which it is
                 # deleting a record
                 if dump.table != "eaudit":
-                    self._queue_changes(dump.read(), dump.table)
+                    self.queue_changes(dump.read(), dump.table)
                 else:
                     # wrap the dump stream in a filter to only allow through records we
                     # want to process
@@ -211,7 +226,7 @@ def queue_emu_changes(self):
                     ):
                         # convert the raw audit records into delete records as we queue
                         # them
-                        self._queue_changes(
+                        self.queue_changes(
                             map(convert_eaudit_to_delete, records), table
                         )
             # we've handled all the dumps from this date, update the last date stored on
@@ -223,7 +238,7 @@ def queue_gbif_changes(self):
         Retrieve the latest GBIF records, check which ones have changed compared to the
         ones stored in the gbif data DB, and then queue them into the GBIF view.
         """
-        self._queue_changes(
+        self.queue_changes(
             get_changed_records(
                 self.dbs["gbif"], self.config.gbif_username, self.config.gbif_password
             ),
diff --git a/dataimporter/links.py b/dataimporter/links.py
index 46bdefc..bfb4bb4 100644
--- a/dataimporter/links.py
+++ b/dataimporter/links.py
@@ -291,3 +291,110 @@ def clear_from_foreign(self):
         Clears out the gbif (foreign) ID to occurrence ID map.
         """
         self.gbif_id_map.clear()
+
+
+class PreparationSpecimenLink(ViewLink):
+    """
+    A ViewLink representing the link between a preparation record and the specimen
+    voucher record it was created from.
+
+    The mapping is one-to-one with exactly one ID sourced from the base prep record.
+    When transforming the base record using the linked specimen record, we copy some
+    fields from the specimen record over to the base prep record, essentially for
+    searching convenience. The full list of fields that are copied is below.
+    """
+
+    # the EMu field on the prep records which links to the specimen voucher record
+    SPECIMEN_ID_REF_FIELD = "EntPreSpecimenRef"
+    # the Portal fields which are copied from the specimen to the prep data dict
+    # TODO: missing CollEventDateVisitedFrom, CollEventName_tab, and kinda ColSite
+    MAPPED_SPECIMEN_FIELDS = [
+        "barcode",
+        "scientificName",
+        "order",
+        "identifiedBy",
+        # this is a ColSite substitute which uses sumPreciseLocation
+        "locality",
+        "decimalLatitude",
+        "decimalLongitude",
+    ]
+
+    def __init__(self, path: Path, prep_view: View, specimen_view: View):
+        """
+        :param path: the path to store the ViewLink data in
+        :param prep_view: the preparation view
+        :param specimen_view: the specimen view
+        """
+        super().__init__(path.name, prep_view, specimen_view)
+        self.path = path
+        # a one-to-one index from prep id -> specimen id
+        self.id_map = Index(path / "id_map")
+
+    def update_from_base(self, prep_records: List[SourceRecord]):
+        """
+        Extracts the linked specimen ID from each of the given prep records and adds
+        them to the ID map.
+
+        :param prep_records: the changed prep records
+        """
+        self.id_map.put_one_to_one(
+            (prep_record.id, specimen_id)
+            for prep_record in prep_records
+            if (
+                specimen_id := prep_record.get_first_value(
+                    PreparationSpecimenLink.SPECIMEN_ID_REF_FIELD
+                )
+            )
+        )
+
+    def update_from_foreign(self, specimen_records: List[SourceRecord]):
+        """
+        Propagate the changes in the given specimen records to the base prep records
+        linked to them.
+
+        :param specimen_records: the updated specimen records
+        """
+        base_ids = {
+            base_id
+            for specimen_record in specimen_records
+            for base_id in self.id_map.reverse_get(specimen_record.id)
+        }
+
+        if base_ids:
+            base_records = list(self.base_view.db.get_records(base_ids))
+            if base_records:
+                # if there are associated base records, queue changes to them on the
+                # base view
+                self.base_view.queue(base_records)
+
+    def transform(self, prep_record: SourceRecord, data: dict):
+        """
+        Transform the given prep record's data with data from the linked voucher
+        specimen, if one exists.
+
+        :param prep_record: the prep record
+        :param data: the data dict to update
+        """
+        specimen_id = prep_record.get_first_value(
+            PreparationSpecimenLink.SPECIMEN_ID_REF_FIELD
+        )
+        if specimen_id:
+            specimen = self.foreign_view.get_and_transform(specimen_id)
+            if specimen is not None:
+                # from DwC
+                data[
+                    "associatedOccurrences"
+                ] = f"Voucher: {specimen.pop('occurrenceID')}"
+                # not from DwC
+                data["specimenID"] = specimen.pop("_id")
+                data.update(
+                    (field, value)
+                    for field in PreparationSpecimenLink.MAPPED_SPECIMEN_FIELDS
+                    if (value := specimen.get(field)) is not None
+                )
+
+    def clear_from_base(self):
+        """
+        Clears out the ID map.
+        """
+        self.id_map.clear()
diff --git a/tests/emu/views/test_preparation.py b/tests/emu/views/test_preparation.py
new file mode 100644
index 0000000..f79a531
--- /dev/null
+++ b/tests/emu/views/test_preparation.py
@@ -0,0 +1,73 @@
+from contextlib import closing
+from pathlib import Path
+from typing import List, Tuple
+
+import pytest
+
+from dataimporter.dbs import DataDB
+from dataimporter.emu.views.utils import (
+    NO_PUBLISH,
+    INVALID_TYPE,
+    INVALID_GUID,
+    INVALID_STATUS,
+    INVALID_DEPARTMENT,
+)
+from dataimporter.model import SourceRecord
+from dataimporter.view import FilterResult, SUCCESS_RESULT
+from dataimporter.emu.views.preparation import PreparationView, INVALID_SUBDEPARTMENT
+from tests.helpers.samples.preparation import (
+    SAMPLE_PREPARATION_DATA,
+    SAMPLE_PREPARATION_ID,
+)
+
+
+@pytest.fixture
+def prep_view(tmp_path: Path) -> PreparationView:
+    with closing(
+        PreparationView(tmp_path / "prep_view", DataDB(tmp_path / "prep_data"))
+    ) as view:
+        yield view
+
+
+is_member_scenarios: List[Tuple[dict, FilterResult]] = [
+    ({"ColRecordType": "Specimen"}, INVALID_TYPE),
+    ({"AdmPublishWebNoPasswordFlag": "n"}, NO_PUBLISH),
+    ({"AdmGUIDPreferredValue": "not a valid guid!"}, INVALID_GUID),
+    ({"SecRecordStatus": "INVALID"}, INVALID_STATUS),
+    ({"ColDepartment": "DDI"}, INVALID_DEPARTMENT),
+    ({"ColSubDepartment": "Informatics"}, INVALID_SUBDEPARTMENT),
+    ({}, SUCCESS_RESULT),
+]
+
+
+@pytest.mark.parametrize("overrides, result", is_member_scenarios)
+def test_is_member(overrides: dict, result: FilterResult, prep_view: PreparationView):
+    data = {**SAMPLE_PREPARATION_DATA, **overrides}
+    record = SourceRecord(SAMPLE_PREPARATION_ID, data, "test")
+    assert prep_view.is_member(record) == result
+
+
+def test_transform_deleted(prep_view: PreparationView):
+    record = SourceRecord(SAMPLE_PREPARATION_ID, {}, "test")
+    assert record.is_deleted
+
+    data = prep_view.transform(record)
+    assert data == {}
+
+
+def test_make_data(prep_view: PreparationView):
+    record = SourceRecord(SAMPLE_PREPARATION_ID, SAMPLE_PREPARATION_DATA, "test")
+
+    data = prep_view.make_data(record)
+    assert data == {
+        "_id": record.id,
+        "created": "2022-09-12T17:07:51+00:00",
+        "modified": "2022-09-12T17:21:14+00:00",
+        "project": "Darwin Tree of Life",
+        "preparationNumber": "C9K02TWP_B2",
+        "preparationType": "DNA Extract",
+        "mediumType": None,
+        "preparationProcess": None,
+        "preparationContents": "**OTHER_SOMATIC_ANIMAL_TISSUE**",
+        "preparationDate": "2022-05-09",
+    }
diff --git a/tests/helpers/samples/dumps.py b/tests/helpers/samples/dumps.py
index 98196ae..1f2d6fa 100644
--- a/tests/helpers/samples/dumps.py
+++ b/tests/helpers/samples/dumps.py
@@ -1,116 +1,56 @@
-import gzip
-import shutil
-from itertools import count
-from pathlib import Path
-from typing import List, Dict
-
-ECATALOGUE_ARTEFACT_SAMPLE_DUMP = (
-    Path(__file__).parent / "emu" / "ecatalogue_artefact_53_sample.gz"
-)
-ECATALOGUE_INDEXLOT_SAMPLE_DUMP = (
-    Path(__file__).parent / "emu" / "ecatalogue_indexlot_2000_sample.gz"
-)
-ECATALOGUE_SPECIMEN_SAMPLE_DUMP = (
-    Path(__file__).parent / "emu" / "ecatalogue_specimen_10000_sample.gz"
-)
-
-EMULTIMEDIA_ARTEFACT_SAMPLE_DUMP = (
-    Path(__file__).parent / "emu" / "emultimedia_artefact_565_sample.gz"
-)
-EMULTIMEDIA_INDEXLOT_SAMPLE_DUMP = (
-    Path(__file__).parent / "emu" / "emultimedia_indexlot_406_sample.gz"
-)
-EMULTIMEDIA_SPECIMEN_SAMPLE_DUMP = (
-    Path(__file__).parent / "emu" / "emultimedia_specimen_11271_sample.gz"
-)
-
-ETAXONOMY_ARTEFACT_SAMPLE_DUMP = (
-    Path(__file__).parent / "emu" / "etaxonomy_artefact_1_sample.gz"
-)
-ETAXONOMY_INDEXLOT_SAMPLE_DUMP = (
-    Path(__file__).parent / "emu" / "etaxonomy_indexlot_1880_sample.gz"
-)
-ETAXONOMY_SPECIMEN_SAMPLE_DUMP = (
-    Path(__file__).parent / "emu" / "etaxonomy_specimen_1_sample.gz"
-)
-
-
-def create_ecatalogue_dump(
-    path: Path,
-    date: str,
-    include_artefacts: bool = True,
-    include_indexlots: bool = True,
-    include_specimens: bool = True,
-):
-    dump_file = path / f"ecatalogue.export.{date}.gz"
-    dumps = []
-    if include_artefacts:
-        dumps.append(ECATALOGUE_ARTEFACT_SAMPLE_DUMP)
-    if include_indexlots:
-        dumps.append(ECATALOGUE_INDEXLOT_SAMPLE_DUMP)
-    if include_specimens:
-        dumps.append(ECATALOGUE_SPECIMEN_SAMPLE_DUMP)
-
-    with dump_file.open("wb") as g:
-        for dump in dumps:
-            with dump.open("rb") as f:
-                shutil.copyfileobj(f, g)
-
-
-def create_emultimedia_dump(
-    path: Path,
-    date: str,
-    include_artefacts: bool = True,
-    include_indexlots: bool = True,
-    include_specimens: bool = True,
-):
-    dump_file = path / f"emultimedia.export.{date}.gz"
-    dumps = []
-    if include_artefacts:
-        dumps.append(EMULTIMEDIA_ARTEFACT_SAMPLE_DUMP)
-    if include_indexlots:
-        dumps.append(EMULTIMEDIA_INDEXLOT_SAMPLE_DUMP)
-    if include_specimens:
-        dumps.append(EMULTIMEDIA_SPECIMEN_SAMPLE_DUMP)
-
-    with dump_file.open("wb") as g:
-        for dump in dumps:
-            with dump.open("rb") as f:
-                shutil.copyfileobj(f, g)
-
-
-def create_etaxonomy_dump(
-    path: Path,
-    date: str,
-    include_artefacts: bool = True,
-    include_indexlots: bool = True,
-    include_specimens: bool = True,
-):
-    dump_file = path / f"etaxonomy.export.{date}.gz"
-    dumps = []
-    if include_artefacts:
-        dumps.append(ETAXONOMY_ARTEFACT_SAMPLE_DUMP)
-    if include_indexlots:
-        dumps.append(ETAXONOMY_INDEXLOT_SAMPLE_DUMP)
-    if include_specimens:
-        dumps.append(ETAXONOMY_SPECIMEN_SAMPLE_DUMP)
-
-    with dump_file.open("wb") as g:
-        for dump in dumps:
-            with dump.open("rb") as f:
-                shutil.copyfileobj(f, g)
-
-
-def create_eaudit_dump(path: Path, irns_to_delete: Dict[str, List[str]], date: str):
-    dump_file = path / f"eaudit.deleted-export.{date}.gz"
-
-    irn_generator = count(1)
-
-    with gzip.GzipFile(dump_file, "wb") as g:
-        for table, irns in irns_to_delete.items():
-            for irn in irns:
-                g.write(f"irn:1={next(irn_generator)}\n".encode("utf-8"))
-                g.write(f"AudOperation:1=delete\n".encode("utf-8"))
-                g.write(f"AudTable:1={table}\n".encode("utf-8"))
-                g.write(f"AudKey:1={irn}\n".encode("utf-8"))
-                g.write("###\n".encode("utf-8"))
+from enum import Enum
+from typing import Optional
+from uuid import uuid4
+
+
+class EcatalogueType(Enum):
+    specimen = "Specimen"
+    indexlot = "Index Lot"
+    artefact = "Artefact"
+    preparation = "Preparation"
+
+
+def create_ecatalogue(
+    irn: str, ecatalogue_type: EcatalogueType, guid: Optional[str] = None, **extras
+) -> dict:
+    base = {
+        "irn": irn,
+        "ColRecordType": ecatalogue_type.value,
+        "AdmPublishWebNoPasswordFlag": "Y",
+        "AdmGUIDPreferredValue": guid if guid is not None else str(uuid4()),
+        "ColDepartment": "Entomology",
+    }
+    if ecatalogue_type == EcatalogueType.preparation:
+        base["ColSubDepartment"] = "Molecular Collections"
+    base.update(extras)
+    return base
+
+
+def create_emultimedia(irn: str, guid: Optional[str] = None, **extras):
+    return {
+        "irn": irn,
+        "MulMimeType": "image",
+        "AdmGUIDPreferredValue": guid if guid is not None else str(uuid4()),
+        "AdmPublishWebNoPasswordFlag": "Y",
+        # image doesn't need this, but MSS does so might as well include it
+        "DocIdentifier": "banana.jpg",
+        **extras,
+    }
+
+
+def create_etaxonomy(irn: str, **extras):
+    return {
+        "irn": irn,
+        "AdmPublishWebNoPasswordFlag": "Y",
+        **extras,
+    }
+
+
+def create_eaudit(irn_to_delete: str, table_to_delete_from: str) -> dict:
+    return {
+        # doesn't matter what the irn of this record is so just always set it to -1
+        "irn": "-1",
+        "AudOperation": "delete",
+        "AudTable": table_to_delete_from,
+        "AudKey": irn_to_delete,
+    }
diff --git a/tests/helpers/samples/preparation.py b/tests/helpers/samples/preparation.py
new file mode 100644
index 0000000..bc1ae08
--- /dev/null
+++ b/tests/helpers/samples/preparation.py
@@ -0,0 +1,73 @@
+from tests.helpers.samples.specimen import SAMPLE_SPECIMEN_ID
+from tests.helpers.samples.utils import read_emu_extract
+
+# this is taken from ecatalogue.export.20231008.gz but with the EntPreSpecimenRef field
+# replaced with a single reference to the SAMPLE_SPECIMEN_ID
+raw_data = f"""
+rownum=3645
+irn:1=9968955
+SummaryData:1=no Collection Kind for preparation (irn 9968955)
+ExtendedData:1=9968955
+ExtendedData:2=
+ExtendedData:3=no Collection Kind for preparation (irn 9968955)
+ColDepartment:1=Zoology
+ColSubDepartment:1=Molecular Collections
+ColRecordType:1=Preparation
+GeneralCatalogueNumber:1=irn: 9968955
+EntIdeQualifiedNameAutomatic:1=Yes
+EntPreSpecimenRef:1={SAMPLE_SPECIMEN_ID}
+EntPreSpecimenRefLocal:1={SAMPLE_SPECIMEN_ID}
+EntPreSpecimenTaxonLocal:1=Eurythenes maldoror d'Udekem d'Acoz & Havermans, 2015 -- Eurytheneidae; Amphipoda; Malacostraca
+EntPreSpecimenTaxonLocalRef:1=790675
+EntPreCatalogueNumberLocal:1=014453676
+EntPreContents:1=**OTHER_SOMATIC_ANIMAL_TISSUE**
+EntPrePreparationKind:1=DNA Extract
+EntPrePreparatorRef:1=406667
+EntPrePreparatorRefLocal:1=406667
+EntPrePreparatorSumDataLocal:1=Chris Fletcher; Natural History Museum; Life Sciences; Fletcher
+EntPreDate=2022-05-09
+EntPreNumber:1=C9K02TWP_B2
+EntPreTaxonSummaryDataLocal:1=Eurythenes maldoror d'Udekem d'Acoz & Havermans, 2015 -- Eurytheneidae; Amphipoda; Malacostraca
+EntPreSpecimenCatNumLocal:1=014453676
+EntPreSpecimenPresLocal:1=Dry frozen (-80°C)
+AcqHistoric:1=No
+LocIndependentlyMoveable:1=Yes
+AcqLegTransferOfTitle:1=No
+AcqLegPurAgree:1=No
+AcqLegConfirmationOfGift:1=No
+AcqLegDueDilligence:1=No
+AcqLegCollectionImpact:1=No
+NteText0:1=S
+NteText1:1=Purpose of specimen: DNA barcoding only
+NteText2:1=Pleopod
+NteType:1=Size
+AdmPublishWebNoPasswordFlag:1=Y
+AdmPublishWebNoPassword:1=Yes
+AdmPublishWebPasswordFlag:1=Y
+AdmPublishWebPassword:1=Yes
+AdmGUIDPreferredType:1=UUID4
+AdmGUIDPreferredValue:1=f11c9c35-4da5-45e5-9dbb-6f8f55b26aa7
+AdmGUIDIsPreferred:1=Yes
+AdmGUIDType:1=UUID4
+AdmGUIDValue:1=f11c9c35-4da5-45e5-9dbb-6f8f55b26aa7
+AdmInsertedBy:1=Heather Allen
+AdmDateInserted=2022-09-12
+AdmImportIdentifier:1=12092022_JC231_Prep
+AdmTimeInserted=17:07:51.000
+AdmSystemIdentifier:1=heata2-220912-1706
+AdmModifiedBy:1=Heather Allen
+AdmDateModified=2022-09-12
+AdmTimeModified=17:21:14.000
+AdmDateRecordModified=2023-10-06
+AdmTimeRecordModified=15:01:03.000
+SecRecordStatus:1=Active
+SecCanDisplay:1=Group Default
+SecCanEdit:1=Group Default
+SecCanDelete:1=Group Default
+SecDepartment:1=Entomology
+SecLookupRoot:1=Entomology
+NhmSecOpenDataPolicyException:1=none
+NhmSecProjectName:1=Darwin Tree of Life
+"""
+
+SAMPLE_PREPARATION_ID, SAMPLE_PREPARATION_DATA = read_emu_extract(raw_data)
diff --git a/tests/test_importer.py b/tests/test_importer.py
index bfb6826..ab604fd 100644
--- a/tests/test_importer.py
+++ b/tests/test_importer.py
@@ -3,19 +3,21 @@
 from unittest.mock import patch, MagicMock
 
 import pytest
+from elasticsearch_dsl import Search
 from freezegun import freeze_time
-from splitgill.manager import SplitgillDatabase
 from splitgill.utils import to_timestamp
 
 from dataimporter.config import Config, MongoConfig, ElasticsearchConfig
 from dataimporter.emu.dumps import FIRST_VERSION
 from dataimporter.importer import DataImporter, EMuStatus
 from dataimporter.model import SourceRecord
+from tests.helpers.dumps import create_dump
 from tests.helpers.samples.dumps import (
-    create_ecatalogue_dump,
-    create_emultimedia_dump,
-    create_etaxonomy_dump,
-    create_eaudit_dump,
+    create_ecatalogue,
+    EcatalogueType,
+    create_emultimedia,
+    create_etaxonomy,
+    create_eaudit,
 )
 
 
@@ -29,6 +31,7 @@ def config(tmp_path: Path) -> Config:
         specimen_id="specimen-id",
         artefact_id="artefact-id",
         indexlot_id="indexlot-id",
+        preparation_id="preparation-id",
         iiif_base_url="https://not.a.real.domain.com/media",
         mongo_config=mongo_config,
         es_config=elasticsearch_config,
@@ -57,70 +60,154 @@ def test_init(self, config: Config):
         assert "artefact" in importer.views
         assert "indexlot" in importer.views
         assert "specimen" in importer.views
+        assert "preparation" in importer.views
+
+        def check_view_link(name):
+            assert name in importer.links
+            base_name, foreign_name = name.split("_")
+            assert importer.links[name].base_view is importer.views[base_name]
+            assert importer.links[name].foreign_view is importer.views[foreign_name]
 
         # check that the view links we expect are created
-        assert "artefact_image" in importer.links
-        assert "indexlot_image" in importer.links
-        assert "indexlot_taxonomy" in importer.links
-        assert "specimen_image" in importer.links
-        assert "specimen_taxonomy" in importer.links
-        assert "specimen_gbif" in importer.links
+        check_view_link("artefact_image")
+        check_view_link("indexlot_image")
+        check_view_link("indexlot_taxonomy")
+        check_view_link("specimen_image")
+        check_view_link("specimen_taxonomy")
+        check_view_link("specimen_gbif")
+        check_view_link("preparation_specimen")
 
         # check that the Splitgill databases we expect are created
         assert "specimen" in importer.sg_dbs
         assert "indexlot" in importer.sg_dbs
         assert "artefact" in importer.sg_dbs
         assert "mss" in importer.sg_dbs
+        assert "preparation" in importer.sg_dbs
 
         importer.close()
 
     def test_queue_emu_changes(self, config: Config):
-        config.dumps_path.mkdir(exist_ok=True)
-        date_1 = "20230905"
-        create_ecatalogue_dump(config.dumps_path, date_1)
-        create_emultimedia_dump(config.dumps_path, date_1)
-        create_etaxonomy_dump(config.dumps_path, date_1)
-
-        with DataImporter(config) as importer:
-            importer.queue_emu_changes()
-
-            assert importer.dbs["ecatalogue"].size() == 53 + 2000 + 10000
-            assert importer.dbs["emultimedia"].size() == 12242
-            assert importer.dbs["etaxonomy"].size() == 1879
-
-            assert importer.views["specimen"].changes.size() == 10000
-            assert importer.views["indexlot"].changes.size() == 2000
-            assert importer.views["artefact"].changes.size() == 53
-            assert importer.views["image"].changes.size() == 12195
-            assert importer.views["mss"].changes.size() == 12195
-
-            # flush all the queues
-            for view in importer.views.values():
-                view.flush()
-                assert view.changes.size() == 0
-
-            # now create an audit dump with one image deleted which is associated with 4
-            # index lots, and one artefact deleted
-            indexlot_image_irn_to_delete = "4712705"
-            artefact_irn_to_delete = "2475123"
-            create_eaudit_dump(
-                config.dumps_path,
-                {
-                    "emultimedia": [indexlot_image_irn_to_delete],
-                    "ecatalogue": [artefact_irn_to_delete],
-                },
-                "20231005",
-            )
-
-            importer.queue_emu_changes()
+        importer = DataImporter(config)
 
-            # the deleted image should be in the image queue
-            assert importer.views["image"].changes.size() == 1
-            # the deleted image should be propagated to the 4 index lots that reference
-            # it, plus the deleted artefact will be queued here too
-            assert importer.views["indexlot"].changes.size() == 5
-            # the deleted artefact should be in the artefact queue
-            assert importer.views["artefact"].changes.size() == 1
+        first_dump_date = date(2023, 10, 3)
+        # create an ecatalogue dump with one record per view
+        create_dump(
+            config.dumps_path,
+            "ecatalogue",
+            first_dump_date,
+            create_ecatalogue(
+                "1", EcatalogueType.specimen, MulMultiMediaRef="1", CardParasiteRef="1"
+            ),
+            create_ecatalogue(
+                "2",
+                EcatalogueType.indexlot,
+                MulMultiMediaRef="2",
+                EntIndIndexLotTaxonNameLocalRef="2",
+            ),
+            create_ecatalogue("3", EcatalogueType.artefact, MulMultiMediaRef="3"),
+            create_ecatalogue("4", EcatalogueType.preparation, EntPreSpecimenRef="1"),
+        )
+        # create an emultimedia dump with 3 images each with an ID that matches the
+        # linked IDs above in the ecatalogue dump via the MulMultiMediaRef field
+        create_dump(
+            config.dumps_path,
+            "emultimedia",
+            first_dump_date,
+            create_emultimedia("1"),
+            create_emultimedia("2"),
+            create_emultimedia("3"),
+        )
+        # create an etaxonomy dump with 2 records one matching the specimen made above
+        # and one matching the index lot
+        create_dump(
+            config.dumps_path,
+            "etaxonomy",
+            first_dump_date,
+            create_etaxonomy("1"),
+            create_etaxonomy("2"),
+        )
+
+        importer.queue_emu_changes()
+
+        assert importer.dbs["ecatalogue"].size() == 4
+        assert importer.dbs["emultimedia"].size() == 3
+        assert importer.dbs["etaxonomy"].size() == 2
+        assert importer.views["specimen"].changes.size() == 1
+        assert importer.views["indexlot"].changes.size() == 1
+        assert importer.views["artefact"].changes.size() == 1
+        assert importer.views["preparation"].changes.size() == 1
+        assert importer.views["image"].changes.size() == 3
+        assert importer.views["mss"].changes.size() == 3
+
+        # flush all the view queues
+        for view in importer.views.values():
+            view.flush()
+            assert view.changes.size() == 0
+
+        second_dump_date = date(2023, 10, 4)
+        create_dump(
+            config.dumps_path,
+            "eaudit",
+            second_dump_date,
+            # delete the index lot
+            create_eaudit("2", "ecatalogue"),
+            # delete the media on the artefact
+            create_eaudit("3", "emultimedia"),
+            # delete the taxonomy of the specimen
+            create_eaudit("1", "etaxonomy"),
+        )
+
+        importer.queue_emu_changes()
+
+        # these should all be the same
+        assert importer.dbs["ecatalogue"].size() == 4
+        assert importer.dbs["emultimedia"].size() == 3
+        assert importer.dbs["etaxonomy"].size() == 2
+        # 1 indexlot delete + specimen update because of the taxonomy delete
+        assert importer.views["specimen"].changes.size() == 2
+        # 1 indexlot delete
+        assert importer.views["indexlot"].changes.size() == 1
+        # 1 indexlot delete + artefact update because of the multimedia delete
+        assert importer.views["artefact"].changes.size() == 2
+        # 1 indexlot delete, 1 specimen change by taxonomy change which is pushed down
+        assert importer.views["preparation"].changes.size() == 2
+        # 1 multimedia delete
+        assert importer.views["image"].changes.size() == 1
+        # 1 multimedia delete
+        assert importer.views["mss"].changes.size() == 1
+
+        for view in importer.views.values():
+            view.flush()
+            assert view.changes.size() == 0
+
+        third_dump_date = date(2023, 10, 8)
+        # update all the multimedia records + a new record unlinked to anything
+        create_dump(
+            config.dumps_path,
+            "emultimedia",
+            third_dump_date,
+            create_emultimedia("1"),
+            create_emultimedia("2"),
+            create_emultimedia("3"),
+            create_emultimedia("4"),
+        )
+
+        importer.queue_emu_changes()
+
+        assert importer.dbs["ecatalogue"].size() == 4
+        # there's a new emultimedia record now
+        assert importer.dbs["emultimedia"].size() == 4
+        assert importer.dbs["etaxonomy"].size() == 2
+        # an image update on an associated image, so 1
+        assert importer.views["specimen"].changes.size() == 1
+        # an image update on an associated image, so 1
+        assert importer.views["indexlot"].changes.size() == 1
+        # an image update on an associated image, so 1
+        assert importer.views["artefact"].changes.size() == 1
+        # an image update on an associated specimen's image, so 1
+        assert importer.views["preparation"].changes.size() == 1
+        assert importer.views["image"].changes.size() == 4
+        assert importer.views["mss"].changes.size() == 4
 
     def test_queue_gbif_changes(self, config: Config):
         gbif_records = [
@@ -137,29 +224,33 @@ def test_queue_gbif_changes(self, config: Config):
                 assert importer.views["gbif"].changes.size() == 3
 
     @freeze_time("2023-10-20 11:04:31")
-    @pytest.mark.usefixtures("reset_mongo")
-    @pytest.mark.parametrize(
-        ("name", "count"),
-        [("artefact", 53), ("indexlot", 2000), ("specimen", 10000), ("mss", 12195)],
-    )
-    def test_add_to_mongo(self, name: str, count: int, config: Config):
-        config.dumps_path.mkdir(exist_ok=True)
-        dump_date = "20230905"
-
-        if name == "mss":
-            # just use emultimedia dumps for the mss view
-            create_emultimedia_dump(config.dumps_path, dump_date)
-        else:
-            # for the other views, only use the data associated with each view, makes
-            # things faster
-            dump_options = {
-                "include_artefacts": name == "artefact",
-                "include_indexlots": name == "indexlot",
-                "include_specimens": name == "specimen",
-            }
-            create_ecatalogue_dump(config.dumps_path, dump_date, **dump_options)
-            create_emultimedia_dump(config.dumps_path, dump_date, **dump_options)
-            create_etaxonomy_dump(config.dumps_path, dump_date, **dump_options)
+    @pytest.mark.usefixtures("reset_mongo", "reset_elasticsearch")
+    def test_add_to_mongo_and_sync_to_elasticsearch_artefact(self, config: Config):
+        name = "artefact"
+        # before the frozen time
+        dump_date = date(2023, 10, 3)
+        # create an ecatalogue dump with 8 artefacts
+        create_dump(
+            config.dumps_path,
+            "ecatalogue",
+            dump_date,
+            *[
+                create_ecatalogue(
+                    str(i),
+                    EcatalogueType[name],
+                    MulMultiMediaRef=str(i),
+                    PalArtObjectName=f"{i} beans",
+                )
+                for i in range(1, 9)
+            ],
+        )
+        # create an emultimedia dump with 8 images
+        create_dump(
+            config.dumps_path,
+            "emultimedia",
+            dump_date,
+            *[create_emultimedia(str(i), MulTitle=f"image {i}") for i in range(1, 9)],
+        )
 
         with DataImporter(config) as importer:
             importer.queue_emu_changes()
@@ -167,58 +258,319 @@ def test_add_to_mongo(self, name: str, count: int, config: Config):
             importer.add_to_mongo(name)
 
             sg_db = importer.sg_dbs[name]
+            assert sg_db.get_mongo_version() == to_timestamp(
+                datetime(2023, 10, 20, 11, 4, 31)
+            )
+            assert sg_db.data_collection.count_documents({}) == 8
+
+            # having parallel=True seems to break in testing, maybe it's something to do
+            # with the test setup or something to do with pytest, who knows
+            importer.sync_to_elasticsearch(name, parallel=False)
+
+            assert sg_db.get_elasticsearch_version() == to_timestamp(
+                datetime(2023, 10, 20, 11, 4, 31)
+            )
+
+            search_base = Search(
+                using=config.get_elasticsearch_client(), index=sg_db.latest_index_name
+            )
+            assert search_base.count() == 8
+            assert (
+                search_base.filter(
+                    "term", **{"parsed.artefactName.k": "3 beans"}
+                ).count()
+                == 1
+            )
+            # this comes from the image
+            assert (
+                search_base.filter(
+                    "term", **{"parsed.associatedMedia.title.k": "image 4"}
+                ).count()
+                == 1
+            )
+
+    @freeze_time("2023-10-20 11:04:31")
+    @pytest.mark.usefixtures("reset_mongo", "reset_elasticsearch")
+    def test_add_to_mongo_and_sync_to_elasticsearch_indexlot(self, config: Config):
+        name = "indexlot"
+        # before the frozen time
+        dump_date = date(2023, 10, 3)
+        # create an ecatalogue dump with 8 indexlots
+        create_dump(
+            config.dumps_path,
+            "ecatalogue",
+            dump_date,
+            *[
+                create_ecatalogue(
+                    str(i),
+                    EcatalogueType[name],
+                    MulMultiMediaRef=str(i),
+                    EntIndIndexLotTaxonNameLocalRef=str(i),
+                    EntIndMaterial=f"{i} lemons",
+                )
+                for i in range(1, 9)
+            ],
+        )
+        # create an emultimedia dump with 8 images
+        create_dump(
+            config.dumps_path,
+            "emultimedia",
+            dump_date,
+            *[create_emultimedia(str(i), MulTitle=f"image {i}") for i in range(1, 9)],
+        )
+        # create an etaxonomy dump with 8 records
+        create_dump(
+            config.dumps_path,
+            "etaxonomy",
+            dump_date,
+            *[create_etaxonomy(str(i), ClaKingdom=f"kingdom {i}") for i in range(1, 9)],
+        )
+
+        with DataImporter(config) as importer:
+            importer.queue_emu_changes()
+
+            importer.add_to_mongo(name)
 
+            sg_db = importer.sg_dbs[name]
             assert sg_db.get_mongo_version() == to_timestamp(
                 datetime(2023, 10, 20, 11, 4, 31)
             )
-            assert sg_db.data_collection.count_documents({}) == count
+            assert sg_db.data_collection.count_documents({}) == 8
+
+            # having parallel=True seems to break in testing, maybe it's something to do
+            # with the test setup or something to do with pytest, who knows
+            importer.sync_to_elasticsearch(name, parallel=False)
+
+            assert sg_db.get_elasticsearch_version() == to_timestamp(
+                datetime(2023, 10, 20, 11, 4, 31)
+            )
+            search_base = Search(
+                using=config.get_elasticsearch_client(), index=sg_db.latest_index_name
+            )
+            assert search_base.count() == 8
+            assert (
+                search_base.filter("term", **{"parsed.material.k": "3 lemons"}).count()
+                == 1
+            )
+            # this comes from the image
+            assert (
+                search_base.filter(
+                    "term", **{"parsed.associatedMedia.title.k": "image 4"}
+                ).count()
+                == 1
+            )
+            # this comes from the taxonomy
+            assert (
+                search_base.filter("term", **{"parsed.kingdom.k": "kingdom 4"}).count()
+                == 1
+            )
 
     @freeze_time("2023-10-20 11:04:31")
     @pytest.mark.usefixtures("reset_mongo", "reset_elasticsearch")
-    @pytest.mark.parametrize(
-        ("name", "count"),
-        [("artefact", 53), ("indexlot", 2000), ("specimen", 10000), ("mss", 12195)],
-    )
-    def test_sync_to_elasticsearch(self, name: str, count: int, config: Config):
-        config.dumps_path.mkdir(exist_ok=True)
-        dump_date = "20230905"
-
-        # setup the EMu dumps we're going to use
-        if name == "mss":
-            # just use emultimedia dumps for the mss view
-            create_emultimedia_dump(config.dumps_path, dump_date)
-        else:
-            # for the other views, only use the data associated with each view, makes
-            # things faster
-            dump_options = {
-                "include_artefacts": name == "artefact",
-                "include_indexlots": name == "indexlot",
-                "include_specimens": name == "specimen",
-            }
-            create_ecatalogue_dump(config.dumps_path, dump_date, **dump_options)
-            create_emultimedia_dump(config.dumps_path, dump_date, **dump_options)
-            create_etaxonomy_dump(config.dumps_path, dump_date, **dump_options)
+    def test_add_to_mongo_and_sync_to_elasticsearch_specimen(self, config: Config):
+        name = "specimen"
+        # before the frozen time
+        dump_date = date(2023, 10, 3)
+        # create an ecatalogue dump with 8 specimens
+        create_dump(
+            config.dumps_path,
+            "ecatalogue",
+            dump_date,
+            *[
+                create_ecatalogue(
+                    str(i),
+                    EcatalogueType[name],
+                    MulMultiMediaRef=str(i),
+                    CardParasiteRef=str(i),
+                    sumPreciseLocation=f"{i} Number Road",
+                )
+                for i in range(1, 9)
+            ],
+        )
+        # create an emultimedia dump with 8 images
+        create_dump(
+            config.dumps_path,
+            "emultimedia",
+            dump_date,
+            *[create_emultimedia(str(i), MulTitle=f"image {i}") for i in range(1, 9)],
+        )
+        # create an etaxonomy dump with 8 records
+        create_dump(
+            config.dumps_path,
+            "etaxonomy",
+            dump_date,
+            *[create_etaxonomy(str(i), ClaKingdom=f"kingdom {i}") for i in range(1, 9)],
+        )
+
+        with DataImporter(config) as importer:
+            importer.queue_emu_changes()
+
+            importer.add_to_mongo(name)
+
+            sg_db = importer.sg_dbs[name]
+            assert sg_db.get_mongo_version() == to_timestamp(
+                datetime(2023, 10, 20, 11, 4, 31)
+            )
+            assert sg_db.data_collection.count_documents({}) == 8
+
+            # having parallel=True seems to break in testing, maybe it's something to do
+            # with the test setup or something to do with pytest, who knows
+            importer.sync_to_elasticsearch(name, parallel=False)
+
+            assert sg_db.get_elasticsearch_version() == to_timestamp(
+                datetime(2023, 10, 20, 11, 4, 31)
+            )
+            search_base = Search(
+                using=config.get_elasticsearch_client(), index=sg_db.latest_index_name
+            )
+            assert search_base.count() == 8
+            assert (
+                search_base.filter(
+                    "term", **{"parsed.locality.k": "3 Number Road"}
+                ).count()
+                == 1
+            )
+            # this comes from the image
+            assert (
+                search_base.filter(
+                    "term", **{"parsed.associatedMedia.title.k": "image 4"}
+                ).count()
+                == 1
+            )
+            # this comes from the taxonomy
+            assert (
+                search_base.filter("term", **{"parsed.kingdom.k": "kingdom 4"}).count()
+                == 1
+            )
+
+    @freeze_time("2023-10-20 11:04:31")
+    @pytest.mark.usefixtures("reset_mongo", "reset_elasticsearch")
+    def test_add_to_mongo_and_sync_to_elasticsearch_mss(self, config: Config):
+        name = "mss"
+        # before the frozen time
+        dump_date = date(2023, 10, 3)
+        # create an emultimedia dump with 8 images
+        create_dump(
+            config.dumps_path,
+            "emultimedia",
+            dump_date,
+            *[
+                create_emultimedia(str(i), DocIdentifier=f"banana-{i}.jpg")
+                for i in range(1, 9)
+            ],
+        )
 
         with DataImporter(config) as importer:
-            # queue the changes from the dumps
             importer.queue_emu_changes()
-            # add the data to mongo
+
             importer.add_to_mongo(name)
 
+            sg_db = importer.sg_dbs[name]
+            assert sg_db.get_mongo_version() == to_timestamp(
+                datetime(2023, 10, 20, 11, 4, 31)
+            )
+            assert sg_db.data_collection.count_documents({}) == 8
+
             # having parallel=True seems to break in testing, maybe it's something to do
             # with the test setup or something to do with pytest, who knows
             importer.sync_to_elasticsearch(name, parallel=False)
 
-            sg_db: SplitgillDatabase = importer.sg_dbs[name]
+            assert sg_db.get_elasticsearch_version() == to_timestamp(
+                datetime(2023, 10, 20, 11, 4, 31)
+            )
+            search_base = Search(
+                using=config.get_elasticsearch_client(), index=sg_db.latest_index_name
+            )
+            assert search_base.count() == 8
+            assert (
+                search_base.filter("term", **{"parsed.file.k": "banana-4.jpg"}).count()
+                == 1
+            )
+
+    @freeze_time("2023-10-20 11:04:31")
+    @pytest.mark.usefixtures("reset_mongo", "reset_elasticsearch")
+    def test_add_to_mongo_and_sync_to_elasticsearch_preparation(self, config: Config):
+        name = "preparation"
+        # before the frozen time
+        dump_date = date(2023, 10, 3)
+        # create an ecatalogue dump with 8 specimens and 8 preparations
+        ecat_records = [
+            *[
+                create_ecatalogue(
+                    str(i),
+                    EcatalogueType[name],
+                    EntPreSpecimenRef=str(i + 8),
+                    EntPreStorageMedium=f"Ethanol ({i}%)",
+                )
+                for i in range(1, 9)
+            ],
+            *[
+                create_ecatalogue(
+                    str(i),
+                    EcatalogueType.specimen,
+                    MulMultiMediaRef=str(i),
+                    CardParasiteRef=str(i),
+                    EntCatBarcode=f"000-00-0-{i}",
+                )
+                for i in range(9, 17)
+            ],
+        ]
+        create_dump(config.dumps_path, "ecatalogue", dump_date, *ecat_records)
+        # create an emultimedia dump with 8 images
+        create_dump(
+            config.dumps_path,
+            "emultimedia",
+            dump_date,
+            *[create_emultimedia(str(i), MulTitle=f"image {i}") for i in range(9, 17)],
+        )
+        # create an etaxonomy dump with 8 records
+        create_dump(
+            config.dumps_path,
+            "etaxonomy",
+            dump_date,
+            *[create_etaxonomy(str(i), ClaOrder=f"order {i}") for i in range(9, 17)],
+        )
+
+        with DataImporter(config) as importer:
+            importer.queue_emu_changes()
+
+            importer.add_to_mongo(name)
+
+            sg_db = importer.sg_dbs[name]
+            assert sg_db.get_mongo_version() == to_timestamp(
+                datetime(2023, 10, 20, 11, 4, 31)
+            )
+            assert sg_db.data_collection.count_documents({}) == 8
+
+            # having parallel=True seems to break in testing, maybe it's something to do
+            # with the test setup or something to do with pytest, who knows
+            importer.sync_to_elasticsearch(name, parallel=False)
 
             assert sg_db.get_elasticsearch_version() == to_timestamp(
                 datetime(2023, 10, 20, 11, 4, 31)
             )
+            search_base = Search(
+                using=config.get_elasticsearch_client(), index=sg_db.latest_index_name
+            )
+            assert search_base.count() == 8
+            assert (
+                search_base.filter(
+                    "term", **{"parsed.mediumType.k": "Ethanol (6%)"}
+                ).count()
+                == 1
+            )
+            # check a field that should have been copied from the voucher specimen
+            assert (
+                search_base.filter(
+                    "term", **{"parsed.barcode.k": "000-00-0-12"}
+                ).count()
+                == 1
+            )
+            # check a field that should have been copied from the voucher specimen's
+            # taxonomy
             assert (
-                config.get_elasticsearch_client().count(
-                    body={}, index=sg_db.latest_index_name
-                )["count"]
-                == count
+                search_base.filter("term", **{"parsed.order.k": "order 11"}).count()
+                == 1
             )
 
     def test_queue_changes_redactions(self, config: Config):
@@ -234,7 +586,7 @@ def test_queue_changes_redactions(self, config: Config):
         importer.redaction_database.add_ids("ecatalogue", ["2", "3"], "reason_1")
 
         # queue all the change records
-        importer._queue_changes(changed_records, "ecatalogue")
+        importer.queue_changes(changed_records, "ecatalogue")
 
         assert "1" in importer.dbs["ecatalogue"]
         assert "2" not in importer.dbs["ecatalogue"]
@@ -250,7 +602,7 @@ def test_redact_records(self, config: Config):
         ]
 
         # queue all the records
-        importer._queue_changes(records, "ecatalogue")
+        importer.queue_changes(records, "ecatalogue")
 
         # redact records 2 and 3
         redacted_count = importer.redact_records("ecatalogue", ["2", "3"], "reason1")
diff --git a/tests/test_links.py b/tests/test_links.py
index efde964..6631bfa 100644
--- a/tests/test_links.py
+++ b/tests/test_links.py
@@ -2,7 +2,12 @@
 from unittest.mock import MagicMock
 
 from dataimporter.dbs import DataDB
-from dataimporter.links import MediaLink, TaxonomyLink, GBIFLink
+from dataimporter.links import (
+    MediaLink,
+    TaxonomyLink,
+    GBIFLink,
+    PreparationSpecimenLink,
+)
 from dataimporter.model import SourceRecord
 from dataimporter.view import View
 
@@ -480,3 +485,182 @@ def test_clear_from_foreign(self, tmp_path: Path):
         gbif_link.clear_from_foreign()
 
         assert gbif_link.gbif_id_map.size() == 0
+
+
+class TestPreparationSpecimenLink:
+    def test_update_from_base(self, tmp_path: Path):
+        base_view = View(tmp_path / "base_view", DataDB(tmp_path / "base_data"))
+        specimen_view = View(
+            tmp_path / "specimen_view", DataDB(tmp_path / "specimen_view")
+        )
+        prep_link = PreparationSpecimenLink(
+            tmp_path / "prep_spec_link", base_view, specimen_view
+        )
+
+        base_records = [
+            SourceRecord(
+                "p1", {PreparationSpecimenLink.SPECIMEN_ID_REF_FIELD: "s1"}, "base"
+            ),
+            # this scenario is not expected, but sensible to check for it given EMu can
+            # do anything at any time
+            SourceRecord(
+                "p2",
+                {PreparationSpecimenLink.SPECIMEN_ID_REF_FIELD: ("s2", "s3")},
+                "base",
+            ),
+            SourceRecord("p3", {"not_the_field": "s4"}, "base"),
+            SourceRecord(
+                "p4", {PreparationSpecimenLink.SPECIMEN_ID_REF_FIELD: "s1"}, "base"
+            ),
+        ]
+
+        prep_link.update_from_base(base_records)
+
+        assert prep_link.id_map.get_one("p1") == "s1"
+        assert prep_link.id_map.get_one("p2") == "s2"
+        assert prep_link.id_map.get_one("p3") is None
+        assert prep_link.id_map.get_one("p4") == "s1"
+
+    def test_update_from_foreign(self, tmp_path: Path):
+        base_view = View(tmp_path / "base_view", DataDB(tmp_path / "base_data"))
+        specimen_view = View(
+            tmp_path / "specimen_view", DataDB(tmp_path / "specimen_view")
+        )
+        prep_link = PreparationSpecimenLink(
+            tmp_path / "prep_spec_link", base_view, specimen_view
+        )
+
+        base_records = [
+            SourceRecord(
+                "p1", {PreparationSpecimenLink.SPECIMEN_ID_REF_FIELD: "s1"}, "base"
+            ),
+            # this scenario is not expected, but sensible to check for it given EMu can
+            # do anything at any time
+            SourceRecord(
+                "p2",
+                {PreparationSpecimenLink.SPECIMEN_ID_REF_FIELD: ("s2", "s3")},
+                "base",
+            ),
+            SourceRecord("p3", {"not_the_field": "s4"}, "base"),
+            SourceRecord(
+                "p4", {PreparationSpecimenLink.SPECIMEN_ID_REF_FIELD: "s1"}, "base"
+            ),
+        ]
+        base_view.db.put_many(base_records)
+        prep_link.update_from_base(base_records)
+
+        specimen_records = [
+            SourceRecord("s1", {"x": "1"}, "specimen"),
+            SourceRecord("s2", {"x": "2"}, "specimen"),
+            SourceRecord("s3", {"x": "3"}, "specimen"),
+            SourceRecord("s4", {"x": "4"}, "specimen"),
+        ]
+
+        # replace the queue method on the base view with a mock
+        base_view.queue = MagicMock()
+
+        prep_link.update_from_foreign(specimen_records)
+
+        queued_base_records = base_view.queue.call_args.args[0]
+        assert len(queued_base_records) == 3
+        # p1
+        assert base_records[0] in queued_base_records
+        # p2
+        assert base_records[3] in queued_base_records
+        # p4
+        assert base_records[3] in queued_base_records
+
+    def test_transform_missing(self, tmp_path: Path):
+        base_view = View(tmp_path / "base_view", DataDB(tmp_path / "base_data"))
+        specimen_view = View(
+            tmp_path / "specimen_view", DataDB(tmp_path / "specimen_view")
+        )
+        prep_link = PreparationSpecimenLink(
+            tmp_path / "prep_spec_link", base_view, specimen_view
+        )
+
+        base_record = SourceRecord(
+            "p1", {PreparationSpecimenLink.SPECIMEN_ID_REF_FIELD: "s1"}, "base"
+        )
+        prep_link.update_from_base([base_record])
+        data = {"beans": "always"}
+        prep_link.transform(base_record, data)
+
+        assert data == {"beans": "always"}
+
+    def test_transform(self, tmp_path: Path):
+        base_view = View(tmp_path / "base_view", DataDB(tmp_path / "base_data"))
+        specimen_view = View(
+            tmp_path / "specimen_view", DataDB(tmp_path / "specimen_view")
+        )
+        prep_link = PreparationSpecimenLink(
+            tmp_path / "prep_spec_link", base_view, specimen_view
+        )
+
+        base_record = SourceRecord(
+            "p1", {PreparationSpecimenLink.SPECIMEN_ID_REF_FIELD: "s1"}, "base"
+        )
+        prep_link.update_from_base([base_record])
+
+        mapped_field_data = {
+            field: f"{field} data"
+            for field in PreparationSpecimenLink.MAPPED_SPECIMEN_FIELDS
+        }
+        # set one of the fields to None
+        mapped_none_test_field = PreparationSpecimenLink.MAPPED_SPECIMEN_FIELDS[0]
+        mapped_field_data[mapped_none_test_field] = None
+        specimen_record = SourceRecord(
+            "s1",
+            {
+                "occurrenceID": "5",
+                "_id": "8",
+                "an_addition_field": "some value which shouldn't be copied over",
+                **mapped_field_data,
+            },
+            "specimen",
+        )
+        specimen_view.db.put_many([specimen_record])
+
+        data = {"x": "3", "z": "9"}
+        prep_link.transform(base_record, data)
+
+        assert mapped_none_test_field not in data
+        del mapped_field_data[mapped_none_test_field]
+        assert data == {
+            "x": "3",
+            "z": "9",
+            "associatedOccurrences": "Voucher: 5",
+            "specimenID": "8",
+            **mapped_field_data,
+        }
+
+    def test_clear_from_base(self, tmp_path: Path):
+        base_view = View(tmp_path / "base_view", DataDB(tmp_path / "base_data"))
+        specimen_view = View(
+            tmp_path / "specimen_view", DataDB(tmp_path / "specimen_view")
+        )
+        prep_link = PreparationSpecimenLink(
+            tmp_path / "prep_spec_link", base_view, specimen_view
+        )
+
+        base_records = [
+            SourceRecord(
+                "p1", {PreparationSpecimenLink.SPECIMEN_ID_REF_FIELD: "s1"}, "base"
+            ),
+            SourceRecord(
+                "p2",
+                {PreparationSpecimenLink.SPECIMEN_ID_REF_FIELD: ("s2", "s3")},
+                "base",
+            ),
+            SourceRecord("p3", {"not_the_field": "s4"}, "base"),
+            SourceRecord(
+                "p4", {PreparationSpecimenLink.SPECIMEN_ID_REF_FIELD: "s1"}, "base"
+            ),
+        ]
+        base_view.db.put_many(base_records)
+        prep_link.update_from_base(base_records)
+        assert prep_link.id_map.size() > 0
+
+        prep_link.clear_from_base()
+
+        assert prep_link.id_map.size() == 0