From 663e4474318c02f2e84c1d03b92fdb1b12bfaf02 Mon Sep 17 00:00:00 2001 From: Lincoln Puzey Date: Thu, 12 Dec 2024 16:10:47 +0800 Subject: [PATCH 1/8] Make test pass data as file-like object to apply_validation This is matches how this method is used in real code. --- tests/templates/test_validation.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/templates/test_validation.py b/tests/templates/test_validation.py index 016e42e1..8acabfa3 100644 --- a/tests/templates/test_validation.py +++ b/tests/templates/test_validation.py @@ -15,15 +15,16 @@ ) def test_apply_validation(template_id: str, test_params: conftest.MappingParameters) -> None: """Tests the validation for the template""" - # Load Data - data = test_params.data.read_bytes() - # Get Mapper mapper = abis_mapping.get_mapper(template_id) assert mapper - # Validate - report = mapper().apply_validation(data) + # Load Data + with open(test_params.data, "rb") as data: + # Validate + report = mapper().apply_validation(data) + + # Assert validation result assert report.valid == test_params.should_validate # Validate errors if invalid expected (and supplied). if not report.valid: From 9157d35c31817985a73be9e29be3c94c103ecddb Mon Sep 17 00:00:00 2001 From: Lincoln Puzey Date: Thu, 12 Dec 2024 16:12:14 +0800 Subject: [PATCH 2/8] BDRSPS-1109 Make surveyID a required field, and allow multiple rows --- .../survey_metadata_v3/examples/minimal.csv | 1 + .../survey_metadata_v3/examples/minimal.ttl | 77 ++++++++++++++++--- .../examples/minimal_error_too_many_rows.csv | 3 - .../templates/survey_metadata_v3/mapping.py | 32 ++++---- .../templates/survey_metadata_v3/schema.json | 5 +- .../templates/instructions.md | 11 ++- abis_mapping/utils/iri_patterns.py | 2 + tests/templates/conftest.py | 7 -- 8 files changed, 96 insertions(+), 42 deletions(-) delete mode 100644 abis_mapping/templates/survey_metadata_v3/examples/minimal_error_too_many_rows.csv diff --git a/abis_mapping/templates/survey_metadata_v3/examples/minimal.csv b/abis_mapping/templates/survey_metadata_v3/examples/minimal.csv index a3e93b7e..104c432f 100644 --- a/abis_mapping/templates/survey_metadata_v3/examples/minimal.csv +++ b/abis_mapping/templates/survey_metadata_v3/examples/minimal.csv @@ -1,2 +1,3 @@ surveyID,surveyName,surveyPurpose,surveyType,surveyStart,surveyEnd,targetTaxonomicScope,targetHabitatScope,spatialCoverageWKT,geodeticDatum,surveyOrgs,surveyMethodCitation,surveyMethodDescription,surveyMethodURL,keywords COL1,"Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits - Summer",Summer sampling for peak insect diversity.,Wet pitfall trapping,21/01/2015,3/02/2015,Coleoptera | Insecta,Woodland,"POLYGON ((146.363 -33.826, 148.499 -33.826, 148.499 -34.411, 146.363 -33.826))",GDA2020,"NSW Department of Planning, Industry and Environment | CSIRO","Ng, K., Barton, P.S., Blanchard, W. et al. Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits. Oecologia 188, 645–657 (2018). https://doi.org/10.1007/s00442-018-4180-9""","Our experimental design consisted of four 400 m transects running from inside each woodland patch out into four adjoining farmland uses (crop, rested, woody debris application, revegetation plantings). To quantify potential edge efects on beetle species traits, we sampled beetles at five locations along each transect: 200 and 20 m inside woodlands, 200 and 20 m inside farmlands, and at the woodland–farmland edge (0 m). Each sampling location comprised a pair of wet invertebrate pitfall traps. separated by a drift fence (60 cm long x 10 cm high) to help direct arthropods into traps. We opened a total of 220 pairs of traps for 14 days during spring (Oct–Nov 2014), and repeated sampling during summer (January–February 2015). Beetle samples from each pitfall trap pair, and across the two time periods, were pooled to provide one sample per sampling location.",https://doi.org/10.1002/9781118945568.ch11 | https://biocollect.ala.org.au/document/download/2022-01/202201%20CBR%20Flora%20and%20Vegetation%20report_draftv1.pdf ,ground beetle | habitat | morphology | traits | farmland | woodland | remnant vegetation | split-plot study +COL2,"Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits - Winter",Winter sampling for peak insect diversity.,Wet pitfall trapping,21/06/2015,3/07/2015,Coleoptera | Insecta,Woodland,"POLYGON ((146.363 -33.826, 148.499 -33.826, 148.499 -34.411, 146.363 -33.826))",GDA2020,"NSW Department of Planning, Industry and Environment | CSIRO","Ng, K., Barton, P.S., Blanchard, W. et al. Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits. Oecologia 188, 645–657 (2018). https://doi.org/10.1007/s00442-018-4180-9""","Our experimental design consisted of four 400 m transects running from inside each woodland patch out into four adjoining farmland uses (crop, rested, woody debris application, revegetation plantings). To quantify potential edge efects on beetle species traits, we sampled beetles at five locations along each transect: 200 and 20 m inside woodlands, 200 and 20 m inside farmlands, and at the woodland–farmland edge (0 m). Each sampling location comprised a pair of wet invertebrate pitfall traps. separated by a drift fence (60 cm long x 10 cm high) to help direct arthropods into traps. We opened a total of 220 pairs of traps for 14 days during spring (Oct–Nov 2014), and repeated sampling during summer (January–February 2015). Beetle samples from each pitfall trap pair, and across the two time periods, were pooled to provide one sample per sampling location.",https://doi.org/10.1002/9781118945568.ch11 | https://biocollect.ala.org.au/document/download/2022-01/202201%20CBR%20Flora%20and%20Vegetation%20report_draftv1.pdf ,ground beetle | habitat | morphology | traits | farmland | woodland | remnant vegetation | split-plot study diff --git a/abis_mapping/templates/survey_metadata_v3/examples/minimal.ttl b/abis_mapping/templates/survey_metadata_v3/examples/minimal.ttl index 06ac4275..d72779ec 100644 --- a/abis_mapping/templates/survey_metadata_v3/examples/minimal.ttl +++ b/abis_mapping/templates/survey_metadata_v3/examples/minimal.ttl @@ -12,25 +12,29 @@ a schema:Collection ; schema:isPartOf ; - schema:member ; + schema:member , + ; schema:name "Survey Collection - Survey Type - Wet pitfall trapping" ; tern:hasAttribute . a schema:Collection ; schema:isPartOf ; - schema:member ; + schema:member , + ; schema:name "Survey Collection - Target Habitat Scope - Woodland" ; tern:hasAttribute . a schema:Collection ; schema:isPartOf ; - schema:member ; + schema:member , + ; schema:name "Survey Collection - Target Taxonomic Scope - Coleoptera" ; tern:hasAttribute . a schema:Collection ; schema:isPartOf ; - schema:member ; + schema:member , + ; schema:name "Survey Collection - Target Taxonomic Scope - Insecta" ; tern:hasAttribute . @@ -40,15 +44,27 @@ schema:isPartOf ; schema:name "Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits - Summer" . + a abis:Project ; + schema:hasPart ; + schema:identifier "COL2" ; + schema:isPartOf ; + schema:name "Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits - Winter" . + a rdfs:Datatype ; skos:prefLabel "surveyID source" ; prov:qualifiedAttribution [ a prov:Attribution ; + prov:agent ; + prov:hadRole ], + [ a prov:Attribution ; prov:agent ; prov:hadRole ] . a rdfs:Datatype ; skos:prefLabel "surveyID source" ; prov:qualifiedAttribution [ a prov:Attribution ; + prov:agent ; + prov:hadRole ], + [ a prov:Attribution ; prov:agent ; prov:hadRole ] . @@ -110,12 +126,6 @@ rdfs:label "Insecta" ; rdf:value . - a prov:Agent ; - schema:name "CSIRO" . - - a prov:Agent ; - schema:name "NSW Department of Planning, Industry and Environment" . - a tern:Survey ; bdr:purpose "Summer sampling for peak insect diversity." ; bdr:target "Coleoptera", @@ -139,6 +149,35 @@ "woodland" ; schema:name "Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits - Summer" . + a tern:Survey ; + bdr:purpose "Winter sampling for peak insect diversity." ; + bdr:target "Coleoptera", + "Insecta" ; + geo:hasGeometry _:N27fdbd9c9077333e51ac0d0600000001 ; + time:hasTime [ a time:TemporalEntity ; + time:hasBeginning [ a time:Instant ; + time:inXSDDate "2015-06-21"^^xsd:date ] ; + time:hasEnd [ a time:Instant ; + time:inXSDDate "2015-07-03"^^xsd:date ] ] ; + prov:hadPlan ; + schema:identifier "COL2"^^, + "COL2"^^ ; + schema:keywords "farmland", + "ground beetle", + "habitat", + "morphology", + "remnant vegetation", + "split-plot study", + "traits", + "woodland" ; + schema:name "Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits - Winter" . + + a prov:Agent ; + schema:name "CSIRO" . + + a prov:Agent ; + schema:name "NSW Department of Planning, Industry and Environment" . + a prov:Plan ; schema:citation "Ng, K., Barton, P.S., Blanchard, W. et al. Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits. Oecologia 188, 645–657 (2018). https://doi.org/10.1007/s00442-018-4180-9\"" ; schema:description "Our experimental design consisted of four 400 m transects running from inside each woodland patch out into four adjoining farmland uses (crop, rested, woody debris application, revegetation plantings). To quantify potential edge efects on beetle species traits, we sampled beetles at five locations along each transect: 200 and 20 m inside woodlands, 200 and 20 m inside farmlands, and at the woodland–farmland edge (0 m). Each sampling location comprised a pair of wet invertebrate pitfall traps. separated by a drift fence (60 cm long x 10 cm high) to help direct arthropods into traps. We opened a total of 220 pairs of traps for 14 days during spring (Oct–Nov 2014), and repeated sampling during summer (January–February 2015). Beetle samples from each pitfall trap pair, and across the two time periods, were pooled to provide one sample per sampling location." ; @@ -146,6 +185,13 @@ schema:url "https://biocollect.ala.org.au/document/download/2022-01/202201%20CBR%20Flora%20and%20Vegetation%20report_draftv1.pdf"^^xsd:anyURI, "https://doi.org/10.1002/9781118945568.ch11"^^xsd:anyURI . + a prov:Plan ; + schema:citation "Ng, K., Barton, P.S., Blanchard, W. et al. Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits. Oecologia 188, 645–657 (2018). https://doi.org/10.1007/s00442-018-4180-9\"" ; + schema:description "Our experimental design consisted of four 400 m transects running from inside each woodland patch out into four adjoining farmland uses (crop, rested, woody debris application, revegetation plantings). To quantify potential edge efects on beetle species traits, we sampled beetles at five locations along each transect: 200 and 20 m inside woodlands, 200 and 20 m inside farmlands, and at the woodland–farmland edge (0 m). Each sampling location comprised a pair of wet invertebrate pitfall traps. separated by a drift fence (60 cm long x 10 cm high) to help direct arthropods into traps. We opened a total of 220 pairs of traps for 14 days during spring (Oct–Nov 2014), and repeated sampling during summer (January–February 2015). Beetle samples from each pitfall trap pair, and across the two time periods, were pooled to provide one sample per sampling location." ; + schema:isPartOf ; + schema:url "https://biocollect.ala.org.au/document/download/2022-01/202201%20CBR%20Flora%20and%20Vegetation%20report_draftv1.pdf"^^xsd:anyURI, + "https://doi.org/10.1002/9781118945568.ch11"^^xsd:anyURI . + a tern:Dataset . [] a rdf:Statement ; @@ -156,6 +202,17 @@ rdf:subject ; rdfs:comment "supplied as" . +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " POLYGON ((-33.826 146.363, -33.826 148.499, -34.411 148.499, -33.826 146.363))"^^geo:wktLiteral ] ; + rdf:object _:N27fdbd9c9077333e51ac0d0600000001 ; + rdf:predicate geo:hasGeometry ; + rdf:subject ; + rdfs:comment "supplied as" . + _:N27fdbd9c9077333e51ac0d0600000000 a geo:Geometry ; geo:asWKT " POLYGON ((-33.826 146.363, -33.826 148.499, -34.411 148.499, -33.826 146.363))"^^geo:wktLiteral . +_:N27fdbd9c9077333e51ac0d0600000001 a geo:Geometry ; + geo:asWKT " POLYGON ((-33.826 146.363, -33.826 148.499, -34.411 148.499, -33.826 146.363))"^^geo:wktLiteral . + diff --git a/abis_mapping/templates/survey_metadata_v3/examples/minimal_error_too_many_rows.csv b/abis_mapping/templates/survey_metadata_v3/examples/minimal_error_too_many_rows.csv deleted file mode 100644 index 6dbb5c7f..00000000 --- a/abis_mapping/templates/survey_metadata_v3/examples/minimal_error_too_many_rows.csv +++ /dev/null @@ -1,3 +0,0 @@ -surveyID,surveyName,surveyPurpose,surveyType,surveyStart,surveyEnd,targetTaxonomicScope,targetHabitatScope,spatialCoverageWKT,geodeticDatum,surveyOrgs,surveyMethodCitation,surveyMethodDescription,surveyMethodURL,keywords -COL1,"Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits - Summer",Summer sampling for peak insect diversity.,Wet pitfall trapping,21/01/2015,3/02/2015,Coleoptera | Insecta,Woodland,"POLYGON ((146.363 -33.826, 148.499 -33.826, 148.499 -34.411, 146.363 -33.826))",GDA2020,"NSW Department of Planning, Industry and Environment | CSIRO","Ng, K., Barton, P.S., Blanchard, W. et al. Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits. Oecologia 188, 645–657 (2018). https://doi.org/10.1007/s00442-018-4180-9""","Our experimental design consisted of four 400 m transects running from inside each woodland patch out into four adjoining farmland uses (crop, rested, woody debris application, revegetation plantings). To quantify potential edge efects on beetle species traits, we sampled beetles at five locations along each transect: 200 and 20 m inside woodlands, 200 and 20 m inside farmlands, and at the woodland–farmland edge (0 m). Each sampling location comprised a pair of wet invertebrate pitfall traps. separated by a drift fence (60 cm long x 10 cm high) to help direct arthropods into traps. We opened a total of 220 pairs of traps for 14 days during spring (Oct–Nov 2014), and repeated sampling during summer (January–February 2015). Beetle samples from each pitfall trap pair, and across the two time periods, were pooled to provide one sample per sampling location.",https://doi.org/10.1002/9781118945568.ch11 | https://biocollect.ala.org.au/document/download/2022-01/202201%20CBR%20Flora%20and%20Vegetation%20report_draftv1.pdf ,ground beetle | habitat | morphology | traits | farmland | woodland | remnant vegetation | split-plot study -COL2,"Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits - Summer",Summer sampling for peak insect diversity.,Wet pitfall trapping,21/01/2015,3/02/2015,Coleoptera | Insecta,Woodland,"POLYGON ((146.363 -33.826, 148.499 -33.826, 148.499 -34.411, 146.363 -33.826))",GDA2020,"NSW Department of Planning, Industry and Environment | CSIRO","Ng, K., Barton, P.S., Blanchard, W. et al. Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits. Oecologia 188, 645–657 (2018). https://doi.org/10.1007/s00442-018-4180-9""","Our experimental design consisted of four 400 m transects running from inside each woodland patch out into four adjoining farmland uses (crop, rested, woody debris application, revegetation plantings). To quantify potential edge efects on beetle species traits, we sampled beetles at five locations along each transect: 200 and 20 m inside woodlands, 200 and 20 m inside farmlands, and at the woodland–farmland edge (0 m). Each sampling location comprised a pair of wet invertebrate pitfall traps. separated by a drift fence (60 cm long x 10 cm high) to help direct arthropods into traps. We opened a total of 220 pairs of traps for 14 days during spring (Oct–Nov 2014), and repeated sampling during summer (January–February 2015). Beetle samples from each pitfall trap pair, and across the two time periods, were pooled to provide one sample per sampling location.",https://doi.org/10.1002/9781118945568.ch11 | https://biocollect.ala.org.au/document/download/2022-01/202201%20CBR%20Flora%20and%20Vegetation%20report_draftv1.pdf ,ground beetle | habitat | morphology | traits | farmland | woodland | remnant vegetation | split-plot study diff --git a/abis_mapping/templates/survey_metadata_v3/mapping.py b/abis_mapping/templates/survey_metadata_v3/mapping.py index a9861be1..1542e5d2 100644 --- a/abis_mapping/templates/survey_metadata_v3/mapping.py +++ b/abis_mapping/templates/survey_metadata_v3/mapping.py @@ -79,10 +79,9 @@ def apply_validation(self, data: base.types.ReadableType, **kwargs: Any) -> fric report: frictionless.Report = resource.validate( checklist=frictionless.Checklist( checks=[ - # Enforces non-empty and maximum row count. - frictionless.checks.table_dimensions(max_rows=1, min_rows=1), # Extra Custom Checks plugins.tabular.IsTabular(), + plugins.empty.NotEmpty(), plugins.chronological.ChronologicalOrder( field_names=[ "surveyStart", @@ -128,14 +127,14 @@ def apply_mapping_row( project = utils.rdf.uri(f"project/SSD-Survey-Project/{row_num}", base_iri) # Create TERN survey IRI from surveyID field - survey_id: str | None = row["surveyID"] + survey_id: str = row["surveyID"] survey = utils.iri_patterns.survey_iri(base_iri, survey_id) # Create survey plan IRI survey_plan = utils.iri_patterns.plan_iri( base_iri, "survey", - (survey_id or str(row_num)), # fallback to row number when surveyID not available. + survey_id, ) # Conditionally create survey type attribute, value and collection IRIs @@ -362,7 +361,7 @@ def add_project( row (frictionless.Row): Row to be processed in dataset. """ # Extract relevant values from row - project_id = row["surveyID"] + project_id: str = row["surveyID"] project_name = row["surveyName"] # Add type and attach to dataset @@ -371,8 +370,7 @@ def add_project( # Add (required) project name, id (not required) and purpose (not required). graph.add((uri, rdflib.SDO.name, rdflib.Literal(project_name))) - if project_id: - graph.add((uri, rdflib.SDO.identifier, rdflib.Literal(project_id))) + graph.add((uri, rdflib.SDO.identifier, rdflib.Literal(project_id))) # Attach survey graph.add((uri, rdflib.SDO.hasPart, survey)) @@ -402,16 +400,16 @@ def add_survey( graph.add((uri, rdflib.SDO.name, rdflib.Literal(row["surveyName"]))) # Add survey ID - if (survey_id := row["surveyID"]) is not None: - # Add survey id literals per organisation - for survey_org in survey_org_objects: - id_literal = rdflib.Literal(lexical_or_value=survey_id, datatype=survey_org.datatype) - graph.add((uri, rdflib.SDO.identifier, id_literal)) - - # Add survey id as type string if no organisation provided - if len(survey_org_objects) == 0: - id_literal = rdflib.Literal(survey_id) - graph.add((uri, rdflib.SDO.identifier, id_literal)) + survey_id: str = row["surveyID"] + # Add survey id literals per organisation + for survey_org in survey_org_objects: + id_literal = rdflib.Literal(lexical_or_value=survey_id, datatype=survey_org.datatype) + graph.add((uri, rdflib.SDO.identifier, id_literal)) + + # Add survey id as type string if no organisation provided + if len(survey_org_objects) == 0: + id_literal = rdflib.Literal(survey_id) + graph.add((uri, rdflib.SDO.identifier, id_literal)) # Add taxonomic coverage if taxonomic_coverage := row["targetTaxonomicScope"]: diff --git a/abis_mapping/templates/survey_metadata_v3/schema.json b/abis_mapping/templates/survey_metadata_v3/schema.json index ae4682fb..b4a3a6ab 100644 --- a/abis_mapping/templates/survey_metadata_v3/schema.json +++ b/abis_mapping/templates/survey_metadata_v3/schema.json @@ -3,12 +3,13 @@ { "name": "surveyID", "title": "Survey ID", - "description": "The identifier for the survey. Important if more there is more than one survey a the project.", + "description": "The identifier for the survey. Important if more there is more than one survey in the project.", "example": "COL1", "type": "string", "format": "default", "constraints": { - "required": false + "required": true, + "unique": true } }, { diff --git a/abis_mapping/templates/survey_metadata_v3/templates/instructions.md b/abis_mapping/templates/survey_metadata_v3/templates/instructions.md index 9411cdba..ad50ce2a 100644 --- a/abis_mapping/templates/survey_metadata_v3/templates/instructions.md +++ b/abis_mapping/templates/survey_metadata_v3/templates/instructions.md @@ -24,8 +24,6 @@ For data validation, you will need your data file to: - comply with all **data value constraints**; for example the geographic coordinates are consistent with a [geodeticDatum](#geodeticDatum-vocabularies) type of the ***{{values.geodetic_datum_count}}*** available options. -- only **one row of metadata** should be included and only the first row of metadata will be accepted - (this symbolises one Survey per dataset submission). Additional fields may be added **after the templated fields** (noting that the data type is not assumed and values will be encoded as strings). @@ -94,7 +92,14 @@ datatype format, and examples. ## CHANGELOG -No changes from Systematic Survey Metadata Template v2.0.0 +Changes from Systematic Survey Metadata Template v2.0.0 + +* This template now accepts multiple rows of data, to represent multiple Surveys in a Dataset. + +### CHANGED FIELDS + +* Because multiple rows are now allowed, [`surveyID`](#surveyID-field) is now a **mandatory** field, + and each row must have a **unique** value within the template, in order to identify each row. ## APPENDICES ### APPENDIX-I: Vocabulary List diff --git a/abis_mapping/utils/iri_patterns.py b/abis_mapping/utils/iri_patterns.py index 37c7988a..9ce5e719 100644 --- a/abis_mapping/utils/iri_patterns.py +++ b/abis_mapping/utils/iri_patterns.py @@ -35,6 +35,8 @@ def survey_iri( "Survey/{survey_id}", # surveyID is an optional field. When missing fallback to the row number. # (which is always 1 for a Survey, since metadata template must have 1 row) + # TODO remove this fallback once SSD v2 templates are removed, + # since surveyID is a required field from v3+ survey_id=(survey_id or "1"), ) diff --git a/tests/templates/conftest.py b/tests/templates/conftest.py index 94093341..1284e90c 100644 --- a/tests/templates/conftest.py +++ b/tests/templates/conftest.py @@ -242,13 +242,6 @@ class TemplateTestParameters: ), expected=None, ), - MappingParameters( - scenario_name="too_many_rows", - should_validate=False, - expected_error_codes={"table-dimensions"}, - data=pathlib.Path("abis_mapping/templates/survey_metadata_v3/examples/minimal_error_too_many_rows.csv"), - expected=None, - ), MappingParameters( scenario_name="mutually-inclusive-field-missing", should_validate=False, From 220e78806ff61250c7ecd3ab2c39d7b33a4a9162 Mon Sep 17 00:00:00 2001 From: Lincoln Puzey Date: Fri, 13 Dec 2024 12:17:32 +0800 Subject: [PATCH 3/8] BDRSPS-1109 Add plugin for validating surveyID matches the metadata template --- abis_mapping/plugins/__init__.py | 1 + abis_mapping/plugins/survey_id_validation.py | 49 +++++++++++++ tests/plugins/test_survey_id_validation.py | 73 ++++++++++++++++++++ 3 files changed, 123 insertions(+) create mode 100644 abis_mapping/plugins/survey_id_validation.py create mode 100644 tests/plugins/test_survey_id_validation.py diff --git a/abis_mapping/plugins/__init__.py b/abis_mapping/plugins/__init__.py index f616fdd0..5cdca4d2 100644 --- a/abis_mapping/plugins/__init__.py +++ b/abis_mapping/plugins/__init__.py @@ -15,6 +15,7 @@ from . import required from . import sites_geometry from . import string_customized +from . import survey_id_validation from . import tabular from . import timestamp from . import wkt diff --git a/abis_mapping/plugins/survey_id_validation.py b/abis_mapping/plugins/survey_id_validation.py new file mode 100644 index 00000000..ba014f93 --- /dev/null +++ b/abis_mapping/plugins/survey_id_validation.py @@ -0,0 +1,49 @@ +# Third-Party +import attrs +import frictionless +import frictionless.errors + +# Typing +from collections.abc import Iterator, Set + + +@attrs.define(kw_only=True, repr=False) +class SurveyIDValidation(frictionless.Check): + """Validates that the surveyID field, if provided, is valid. + + Attributes: + valid_survey_ids: surveyIDs from the metadata template. + """ + + # Check Attributes + type = "survey-id-validation" + Errors = [frictionless.errors.RowConstraintError] + + # Attributes specific to this check + valid_survey_ids: Set[str] + + def validate_row(self, row: frictionless.Row) -> Iterator[frictionless.Error]: + """Called to validate the given row (on every row). + + Args: + row (frictionless.Row): The row to check. + + Yields: + frictionless.Error: Any errors detected. + """ + # Get surveyID value, which can be None + survey_id: str | None = row["surveyID"] + # If you want surveyID to be required, use "required": true in conjunction + # with this check. + + # If None, don't check anything + if survey_id is None: + return + + # Otherwise check that surveyID is one of the valid values + if survey_id not in self.valid_survey_ids: + yield frictionless.errors.ConstraintError.from_row( + row=row, + note="surveyID must match a surveyID in the survey_metadata template", + field_name="surveyID", + ) diff --git a/tests/plugins/test_survey_id_validation.py b/tests/plugins/test_survey_id_validation.py new file mode 100644 index 00000000..b6504f31 --- /dev/null +++ b/tests/plugins/test_survey_id_validation.py @@ -0,0 +1,73 @@ +"""Provides Unit Tests for the `abis_mapping.plugins.survey_id_validation` module""" + +# Third-Party +import frictionless + +# Local +from abis_mapping import plugins + + +def test_survey_id_validation_valid() -> None: + """Tests the SurveyIDValidation Checker with valid data""" + # Construct Fake Resource + resource = frictionless.Resource( + source=[ + {"rowID": "1", "surveyID": "S1"}, + {"rowID": "2", "surveyID": "S2"}, + {"rowID": "3", "surveyID": None}, + ], + ) + + # Validate + report: frictionless.Report = resource.validate( + checklist=frictionless.Checklist( + checks=[ + plugins.survey_id_validation.SurveyIDValidation( + valid_survey_ids={"S1", "S2", "S3"}, + ), + ], + ), + ) + + # Check + assert report.valid + + +def test_survey_id_validation_invalid() -> None: + """Tests the SurveyIDValidation Checker with invalid data""" + # Construct Fake Resource + resource = frictionless.Resource( + source=[ + {"rowID": "1", "surveyID": "S1"}, + {"rowID": "2", "surveyID": "NOT_A_SURVEY"}, # invalid + {"rowID": "3", "surveyID": "S2"}, + {"rowID": "4", "surveyID": None}, + {"rowID": "5", "surveyID": "ALSO_NOT_A_SURVEY"}, # invalid + ], + ) + + # Validate + report: frictionless.Report = resource.validate( + checklist=frictionless.Checklist( + checks=[ + plugins.survey_id_validation.SurveyIDValidation( + valid_survey_ids={"S1", "S2", "S3"}, + ), + ], + ), + ) + + # Check + assert not report.valid + assert len(report.tasks) == 1 + assert len(report.tasks[0].errors) == 2 + assert report.tasks[0].errors[0].message == ( + 'The cell "NOT_A_SURVEY" in row at position "3" and field ' + '"surveyID" at position "2" does not conform to a constraint: ' + "surveyID must match a surveyID in the survey_metadata template" + ) + assert report.tasks[0].errors[1].message == ( + 'The cell "ALSO_NOT_A_SURVEY" in row at position "6" and field ' + '"surveyID" at position "2" does not conform to a constraint: ' + "surveyID must match a surveyID in the survey_metadata template" + ) From ace355465a923ad125d3fb1be058401374901cad Mon Sep 17 00:00:00 2001 From: Lincoln Puzey Date: Fri, 13 Dec 2024 12:57:23 +0800 Subject: [PATCH 4/8] BDRSPS-1109 Method to extract survey IDs for cross-validation --- .../templates/survey_metadata_v3/mapping.py | 36 ++++++++++++++++++- tests/templates/test_survey_metadata_v3.py | 13 +++++++ 2 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 tests/templates/test_survey_metadata_v3.py diff --git a/abis_mapping/templates/survey_metadata_v3/mapping.py b/abis_mapping/templates/survey_metadata_v3/mapping.py index 1542e5d2..f17ee45b 100644 --- a/abis_mapping/templates/survey_metadata_v3/mapping.py +++ b/abis_mapping/templates/survey_metadata_v3/mapping.py @@ -15,7 +15,7 @@ from abis_mapping import utils # Typing -from typing import Any +from typing import Any, Literal # Constants / shortcuts @@ -101,6 +101,40 @@ def apply_validation(self, data: base.types.ReadableType, **kwargs: Any) -> fric # Return validation report return report + def extract_survey_id_set( + self, + data: base.types.ReadableType, + ) -> dict[str, Literal[True]]: + """Extract surveyID values from the template + + Args: + data (base.types.ReadableType): Raw data. + + Returns: + The set of surveyID values, as a dict. + """ + # Construct schema + schema = frictionless.Schema.from_descriptor(self.schema()) + + # Construct resource + resource = frictionless.Resource( + source=data, + format="csv", + schema=schema, + encoding="utf-8", + ) + + survey_ids: dict[str, Literal[True]] = {} + + # Iterate over rows to extract values + with resource.open() as r: + for row in r.row_stream: + survey_id: str | None = row["surveyID"] + if survey_id: + survey_ids[survey_id] = True + + return survey_ids + def apply_mapping_row( self, *, diff --git a/tests/templates/test_survey_metadata_v3.py b/tests/templates/test_survey_metadata_v3.py new file mode 100644 index 00000000..6aa4487e --- /dev/null +++ b/tests/templates/test_survey_metadata_v3.py @@ -0,0 +1,13 @@ +import pathlib + +import abis_mapping.templates.survey_metadata_v3.mapping + + +def test_extract_survey_id_set() -> None: + """Test the extract_survey_id_set method on the Survey metadata mapper""" + mapper = abis_mapping.templates.survey_metadata_v3.mapping.SurveyMetadataMapper() + + with pathlib.Path("abis_mapping/templates/survey_metadata_v3/examples/minimal.csv").open("rb") as data: + result = mapper.extract_survey_id_set(data) + + assert result == {"COL1": True, "COL2": True} From b7c883d67c0649437f8b35e721a19e57ba420c7a Mon Sep 17 00:00:00 2001 From: Lincoln Puzey Date: Fri, 13 Dec 2024 13:11:16 +0800 Subject: [PATCH 5/8] Add test files for new templates, copied from previous version --- .../test_survey_occurrence_data_v3.py | 565 ++++++++++++++++++ tests/templates/test_survey_site_data_v3.py | 256 ++++++++ .../test_survey_site_visit_data_v3.py | 273 +++++++++ 3 files changed, 1094 insertions(+) create mode 100644 tests/templates/test_survey_occurrence_data_v3.py create mode 100644 tests/templates/test_survey_site_data_v3.py create mode 100644 tests/templates/test_survey_site_visit_data_v3.py diff --git a/tests/templates/test_survey_occurrence_data_v3.py b/tests/templates/test_survey_occurrence_data_v3.py new file mode 100644 index 00000000..ef0fca6b --- /dev/null +++ b/tests/templates/test_survey_occurrence_data_v3.py @@ -0,0 +1,565 @@ +"""Tests for specific for the `survey_occurrence_data` template.""" + +# Standard +import csv +import io +import pathlib + +# Third-party +import attrs +import pandas as pd +import pytest +import pytest_mock +import rdflib + +# Local +from abis_mapping import base +from abis_mapping import models +import abis_mapping.templates.survey_occurrence_data_v3.mapping +from tests import conftest +import tests.helpers + +# Typing +from typing import Iterable + + +# Alias mapper +Mapper = abis_mapping.templates.survey_occurrence_data_v3.mapping.SurveyOccurrenceMapper + +# Convenient shortcut +a = rdflib.RDF.type + + +@pytest.fixture +def mapper() -> Iterable[Mapper]: + """Fixture to provide a mapper instance for tests.""" + # Clear schema lru cache prior to creating instance + Mapper.schema.cache_clear() + + # Yield + yield Mapper() + + # Clear schema lru cache after running test + Mapper.schema.cache_clear() + + +def test_extract_site_visit_id_keys(mocker: pytest_mock.MockerFixture, mapper: Mapper) -> None: + """Test the extract_site_visit_id_keys method. + + Args: + mocker (pytest_mock.MockerFixture): The mocker fixture. + mapper (Mapper): The mapper object fixture. + """ + # Modify schema to only include the necessary fields + descriptor = {"fields": [{"name": "siteVisitID", "type": "string"}]} + mocker.patch.object(base.mapper.ABISMapper, "schema").return_value = descriptor + + # Create raw data csv string + csv_data = b"\r\n".join( + [ + b"siteVisitID", + b"S1", + b"S2", + b"", + b"S3", + b"S2", + b"S1", + ] + ) + + expected = { + "S1": True, + "S2": True, + "S3": True, + } + + # Invoke method + actual = mapper.extract_site_visit_id_keys(csv_data) + + # Validate + assert actual == expected + + +class TestDefaultGeometryMap: + @attrs.define(kw_only=True) + class Scenario: + """Dataclass to hold the scenario parameters.""" + + name: str + raws: list[list[str]] + expected_error_codes: set[str] = set() + default_map: dict[str, str] + + # List of scenarios for the apply_validation method tests + scenarios: list[Scenario] = [ + Scenario( + name="valid_with_default_map", + raws=[ + ["site1", "-38.94", "115.21", "WGS84", "", "", "", ""], + ["site1", "", "", "", "", "", "", ""], + ["site2", "-38.94", "115.21", "WGS84", "", "", "", ""], + ["site3", "-38.94", "115.21", "AGD66", "", "", "", ""], + ["site4", "-38.94", "115.21", "EPSG:4202", "", "", "", ""], + ], + default_map={"site1": "something"}, + ), + Scenario( + name="invalid_missing_from_default_map", + raws=[ + ["site1", "-38.94", "115.21", "WGS84", "", "", "", ""], + ["site1", "", "", "", "", "", "", ""], + ["site2", "-38.94", "115.21", "WGS84", "", "", "", ""], + ], + default_map={"site3": "something"}, + expected_error_codes={"row-constraint"}, + ), + Scenario( + name="invalid_survey_occurrence_requires_latlong", + raws=[ + ["site1", "-38.94", "115.21", "WGS84", "", "", "", ""], + ["", "", "", "", "", "", "VU", "VIC"], + ["site2", "-38.94", "115.21", "WGS84", "", "", "", ""], + ], + default_map={}, + expected_error_codes={"row-constraint"}, + ), + Scenario( + name="valid_survey_occurrence_requires_latlong", + raws=[ + ["site1", "-38.94", "115.21", "WGS84", "", "", "", ""], + ["", "-38.94", "115.21", "WGS84", "", "", "VU", "VIC"], + ["site2", "-38.94", "115.21", "WGS84", "", "", "", ""], + # The following show that non-url safe characters get encoded during mapping. + ["site a", "-38.94", "115.21", "WGS84", "", "", "", ""], + ["site/b", "-38.94", "115.21", "WGS84", "", "", "", ""], + ["site%20c", "-38.94", "115.21", "WGS84", "", "", "", ""], + ], + default_map={}, + ), + Scenario( + name="invalid_missing_long", + raws=[ + ["site1", "-38.94", "115.21", "WGS84", "", "", "", ""], + ["site1", "-38.94", "", "WGS84", "", "", "", ""], + ["site2", "-38.94", "115.21", "WGS84", "", "", "", ""], + ], + default_map={"site1": "something"}, + expected_error_codes={"row-constraint"}, + ), + Scenario( + name="invalid_missing_lat", + raws=[ + ["site1", "-38.94", "115.21", "WGS84", "", "", "", ""], + ["site1", "", "115.21", "WGS84", "", "", "", ""], + ["site2", "-38.94", "115.21", "WGS84", "", "", "", ""], + ], + default_map={"site1": "something"}, + expected_error_codes={"row-constraint"}, + ), + Scenario( + name="invalid_survey_occurrence_missing_lat", + raws=[ + ["site1", "-38.94", "115.21", "WGS84", "", "", "", ""], + ["", "", "115.21", "WGS84", "", "", "", ""], + ["site2", "-38.94", "115.21", "WGS84", "", "", "", ""], + ], + default_map={}, + expected_error_codes={"row-constraint"}, + ), + Scenario( + name="invalid_survey_occurrence_missing_long", + raws=[ + ["site1", "-38.94", "115.21", "WGS84", "", "", "", ""], + ["", "-38.94", "", "WGS84", "", "", "", ""], + ["site2", "-38.94", "115.21", "WGS84", "", "", "", ""], + ], + default_map={}, + expected_error_codes={"row-constraint"}, + ), + Scenario( + name="invalid_missing_geodetic_datum", + raws=[ + ["site1", "-38.94", "115.21", "WGS84", "", "", "", ""], + ["site1", "-38.94", "115.21", "", "", "", "", ""], + ["site2", "-38.94", "115.21", "WGS84", "", "", "", ""], + ], + default_map={"site1": "something"}, + expected_error_codes={"row-constraint"}, + ), + Scenario( + name="invalid_survey_occurrence_missing_geodetic_datum", + raws=[ + ["site1", "-38.94", "115.21", "WGS84", "", "", "", ""], + ["", "-38.94", "115.21", "", "", "", "", ""], + ["site2", "-38.94", "115.21", "WGS84", "", "", "", ""], + ], + default_map={}, + expected_error_codes={"row-constraint"}, + ), + ] + + @pytest.mark.parametrize( + argnames="scenario", + argvalues=scenarios, + ids=(scenario.name for scenario in scenarios), + ) + def test_apply_validation(self, scenario: Scenario, mocker: pytest_mock.MockerFixture, mapper: Mapper) -> None: + """Tests the `apply_validation` method with a supplied default map. + + Args: + scenario (Scenario): The parameters of the scenario under test. + mocker (pytest_mock.MockerFixture): The mocker fixture. + mapper (Mapper): Mapper instance fixture. + """ + # Construct fake data + rawh = [ + "siteID", + "decimalLatitude", + "decimalLongitude", + "geodeticDatum", + "organismQuantity", + "organismQuantityType", + "threatStatus", + "conservationAuthority", + ] + all_raw = [{hname: val for hname, val in zip(rawh, ln, strict=True)} for ln in scenario.raws] + + # Modify schema to only fields required for test + original_fields = mapper.schema()["fields"] + assert set(rawh) - {f["name"] for f in original_fields} == set() + descriptor = {"fields": [field for field in original_fields if field["name"] in rawh]} + descriptor["fields"].sort(key=lambda f: rawh.index(f["name"])) + + # Patch the schema for the test + mocker.patch.object(base.mapper.ABISMapper, "schema").return_value = descriptor + + # Create raw data csv string + output = io.StringIO() + csv_writer = csv.DictWriter(output, fieldnames=rawh) + csv_writer.writeheader() + for row in all_raw: + csv_writer.writerow(row) + csv_data = output.getvalue().encode("utf-8") + + # Apply validation + report = mapper.apply_validation( + data=csv_data, + site_id_geometry_map=scenario.default_map, + ) + + # Assert + assert report.valid == (scenario.expected_error_codes == set()) + if not report.valid: + error_codes = [code for codes in report.flatten(["type"]) for code in codes] + assert set(error_codes) == scenario.expected_error_codes + + def test_apply_mapping(self, mapper: Mapper) -> None: + """Tests apply_mapping method with default geometry map. + + Args: + mapper (Mapper): Mapper instance fixture. + """ + # Build a dataframe from an existing csv + df = pd.read_csv("abis_mapping/templates/survey_occurrence_data_v3/examples/organism_qty.csv") + + # Modify and preserve first entry + col_names = ["decimalLongitude", "decimalLatitude", "geodeticDatum"] + s_geo_vals = df[[*col_names, "siteID"]].iloc[0] + df.loc[0] = df.loc[0].drop(col_names) + + # Proving the values null for first row + assert df[col_names].loc[0].isna().all() + + # Write out to memory + output = io.StringIO() + # Write dataframe to memory as csv + df.to_csv(output, index=False) + + # Assign csv data to variable + csv_data = output.getvalue().encode("utf-8") + + expected = pathlib.Path( + "abis_mapping/templates/survey_occurrence_data_v3/examples/organism_qty.ttl" + ).read_text() + + # Resulting graph doesn't match expected when no lat/long provided + graphs = list( + mapper.apply_mapping( + data=csv_data, + chunk_size=None, + dataset_iri=tests.helpers.TEST_DATASET_IRI, + base_iri=tests.helpers.TEST_BASE_NAMESPACE, + ) + ) + assert len(graphs) == 1 + assert not conftest.compare_graphs(graphs[0], expected) + + # Make site id geo default map using values extracted previously + val = str( + models.spatial.Geometry( + raw=models.spatial.LatLong(s_geo_vals["decimalLatitude"], s_geo_vals["decimalLongitude"]), + datum=s_geo_vals["geodeticDatum"], + ).to_rdf_literal() + ) + default_map = {s_geo_vals["siteID"]: val} + + # Create graph + graphs = list( + mapper.apply_mapping( + data=csv_data, + chunk_size=None, + dataset_iri=tests.helpers.TEST_DATASET_IRI, + base_iri=tests.helpers.TEST_BASE_NAMESPACE, + site_id_geometry_map=default_map, + ) + ) + assert len(graphs) == 1 + + # Now with the provided default map values the graph should match. + assert conftest.compare_graphs(graphs[0], expected) + assert "None" not in graphs[0].serialize(format="ttl") + + +class TestDefaultTemporalMap: + """Tests specific to the provision of a default temporal map.""" + + @attrs.define(kw_only=True) + class Scenario: + """Dataclass to hold the scenario parameters.""" + + name: str + raws: list[list[str]] + expected_error_codes: set[str] = set() + default_map: dict[str, str] + + scenarios: list[Scenario] = [ + Scenario( + name="valid_with_default_map", + raws=[ + ["SV1", "S1", "2024-10-16"], + ["SV2", "S1", ""], + ["SV3", "S1", "2024-10-16T15:15:15+0800"], + ["SV4", "S1", ""], + ], + default_map={"SV2": "some rdf", "SV4": "some rdf"}, + ), + Scenario( + name="invalid_with_default_map", + raws=[ + ["SV1", "S1", "2024-10-16"], + ["SV2", "S1", ""], + ["SV3", "S1", "2024-10-16T15:15:15+0800"], + ["SV4", "S1", ""], + ], + default_map={"SV2": "some rdf"}, + expected_error_codes={"row-constraint"}, + ), + ] + + @pytest.mark.parametrize( + argnames="scenario", + argvalues=scenarios, + ids=[scenario.name for scenario in scenarios], + ) + def test_apply_validation(self, scenario: Scenario, mocker: pytest_mock.MockerFixture, mapper: Mapper) -> None: + """Tests the `apply_validation` method with a supplied default map. + + Args: + scenario (Scenario): The parameters of the scenario under test. + mocker (pytest_mock.MockerFixture): The mocker fixture. + mapper (Mapper): Mapper instance fixture. + """ + # Construct fake data + rawh = [ + "siteVisitID", + "siteID", + "eventDateStart", + ] + all_raw = [{hname: val for hname, val in zip(rawh, ln, strict=True)} for ln in scenario.raws] + + # Modify schema to only fields required for test + descriptor = {"fields": [field for field in Mapper.schema()["fields"] if field["name"] in rawh]} + descriptor["fields"].sort(key=lambda f: rawh.index(f["name"])) + + # Patch the schema for the test + mocker.patch.object(base.mapper.ABISMapper, "schema").return_value = descriptor + + # Create raw data csv string + output = io.StringIO() + csv_writer = csv.DictWriter(output, fieldnames=rawh) + csv_writer.writeheader() + for row in all_raw: + csv_writer.writerow(row) + csv_data = output.getvalue().encode("utf-8") + + # Apply validation + report = mapper.apply_validation( + data=csv_data, + site_visit_id_temporal_map=scenario.default_map, + ) + + # Assert + assert report.valid == (scenario.expected_error_codes == set()) + if not report.valid: + error_codes = [code for codes in report.flatten(["type"]) for code in codes] + assert set(error_codes) == scenario.expected_error_codes + + def test_apply_mapping(self, mapper: Mapper) -> None: + """Tests the `apply_mapping` method with supplied default map. + + Args: + mapper (Mapper): Mapper instance fixture. + """ + # Build a dataframe from an existing csv + df: pd.DataFrame = pd.read_csv("abis_mapping/templates/survey_occurrence_data_v3/examples/organism_qty.csv") + + # Set first row site_visit_id to test value and nullify event date + df["siteVisitID"] = df["siteVisitID"].astype(str) + df.loc[0, "siteVisitID"] = "SV1" + df.loc[0, "eventDateStart"] = pd.NA + + # Create a default temporal map. + temp_g = rdflib.Graph() + top_node = rdflib.BNode() + ftn = rdflib.URIRef("http://example.com/FakeTestNode") + temp_g.add((top_node, a, rdflib.TIME.TemporalEntity)) + temp_g.add((top_node, a, ftn)) + site_visit_id_temporal_map = {"SV1": temp_g.serialize()} + + # Invoke + graphs = mapper.apply_mapping( + data=df.to_csv(index=False).encode("utf-8"), + chunk_size=None, + dataset_iri=tests.helpers.TEST_DATASET_IRI, + base_iri=tests.helpers.TEST_BASE_NAMESPACE, + site_visit_id_temporal_map=site_visit_id_temporal_map, + ) + res_g = next(graphs) + # Ensure temporal entity added to graph + assert next(res_g.subjects(a, ftn)) is not None + + +class TestSiteVisitIDSiteIDMap: + """Tests specific to the provision of a site visit id -> site id map.""" + + @attrs.define(kw_only=True) + class Scenario: + """Dataclass to hold the scenario parameters.""" + + name: str + raws: list[list[str]] + expected_error_codes: set[str] = set() + lookup_map: dict[str, str] + + scenarios: list[Scenario] = [ + Scenario( + name="valid_with_default_map", + raws=[ + ["SV1", "S1"], + ["SV2", "S1"], + ["SV3", "S1"], + ["SV4", "S1"], + ["", "S1"], + ], + lookup_map={"SV1": "S1", "SV2": "S1", "SV3": "S1", "SV4": "S1"}, + ), + Scenario( + name="invalid_with_default_map", + raws=[ + ["SV1", "S1"], + ["SV2", "S1"], + ["SV3", "S1"], + ["SV4", "S1"], + ], + lookup_map={"SV2": "S2"}, + expected_error_codes={"row-constraint"}, + ), + ] + + @pytest.mark.parametrize( + argnames="scenario", + argvalues=scenarios, + ids=[scenario.name for scenario in scenarios], + ) + def test_apply_validation(self, scenario: Scenario, mocker: pytest_mock.MockerFixture, mapper: Mapper) -> None: + """Tests the `apply_validation` method with a supplied default map. + + Args: + scenario (Scenario): The parameters of the scenario under test. + mocker (pytest_mock.MockerFixture): The mocker fixture. + mapper (Mapper): Mapper instance fixture. + """ + # Construct fake data + rawh = [ + "siteVisitID", + "siteID", + ] + all_raw = [{hname: val for hname, val in zip(rawh, ln, strict=True)} for ln in scenario.raws] + + # Modify schema to only fields required for test + descriptor = {"fields": [field for field in Mapper.schema()["fields"] if field["name"] in rawh]} + descriptor["fields"].sort(key=lambda f: rawh.index(f["name"])) + + # Patch the schema for the test + mocker.patch.object(base.mapper.ABISMapper, "schema").return_value = descriptor + + # Create raw data csv string + output = io.StringIO() + csv_writer = csv.DictWriter(output, fieldnames=rawh) + csv_writer.writeheader() + for row in all_raw: + csv_writer.writerow(row) + csv_data = output.getvalue().encode("utf-8") + + # Apply validation + report = mapper.apply_validation( + data=csv_data, + site_visit_id_site_id_map=scenario.lookup_map, + ) + + # Assert + assert report.valid == (scenario.expected_error_codes == set()) + if not report.valid: + error_codes = [code for codes in report.flatten(["type"]) for code in codes] + assert set(error_codes) == scenario.expected_error_codes + + +def test_extract_site_id_keys( + mocker: pytest_mock.MockerFixture, + mapper: Mapper, +) -> None: + """Test the extract_site_id_keys method. + + Args: + mocker (pytest_mock.MockerFixture): The mocker fixture. + """ + # Construct a raw data set only using fields relevant to method. + rawh = ["siteID"] + raws = [["site1"], [""], ["site2"], ["site3"], ["site3"]] + + # Amalgamate into a list of dicts + all_raw = [{hname: val for hname, val in zip(rawh, ln, strict=True)} for ln in raws] + + # Modify schema to only include the necessary fields + descriptor = {"fields": [{"name": "siteID", "type": "string"}]} + mocker.patch.object(base.mapper.ABISMapper, "schema").return_value = descriptor + + # Create raw data csv string + output = io.StringIO() + csv_writer = csv.DictWriter(output, fieldnames=rawh) + csv_writer.writeheader() + for row in all_raw: + csv_writer.writerow(row) + csv_data = output.getvalue().encode("utf-8") + + expected = { + "site1": True, + "site2": True, + "site3": True, + } + + # Invoke method + actual = mapper.extract_site_id_keys(csv_data) + + # Validate + assert actual == expected diff --git a/tests/templates/test_survey_site_data_v3.py b/tests/templates/test_survey_site_data_v3.py new file mode 100644 index 00000000..402950d9 --- /dev/null +++ b/tests/templates/test_survey_site_data_v3.py @@ -0,0 +1,256 @@ +"""Tests for the survey_site_data template not common to other templates.""" + +# Standard +import csv +import io + +# Third-party +import attrs +import frictionless +import pytest +import pytest_mock +import rdflib + +# Local +from abis_mapping import base +import abis_mapping.templates.survey_site_data_v3.mapping + +# Typing +from typing import Any + + +def test_extract_geometry_defaults( + mocker: pytest_mock.MockerFixture, +) -> None: + """Test the extract_geometry_defaults method. + + Args: + mocker: The mocker fixture. + """ + # Construct a dummy raw data set using only the fields that matter to the method. + rawh = ["siteID", "footprintWKT", "decimalLongitude", "decimalLatitude", "geodeticDatum"] + raws = [ + ["site1", "POLYGON((0 0, 0 5, 5 5, 5 0, 0 0))", "", "", "WGS84"], + ["site2", "POLYGON((0 0, 0 5, 5 5, 5 0, 0 0))", "10.0", "20.0", "WGS84"], + ["site3", "", "10.0", "20.0", "WGS84"], + ["site4", "", "", "", ""], + ["site5", "", "10.0", "20.0", ""], + ["site6", "POLYGON((0 0, 0 5, 5 5, 5 0, 0 0))", "", "", ""], + ["site7", "", "10.0", "20.0", "AGD66"], + ["site8", "", "11.0", "21.0", "EPSG:4202"], + ["site9", "", "12.0", "22.0", "GRS20"], + # rows with missing siteID should not be included in map + ["", "POLYGON((0 0, 0 5, 5 5, 5 0, 0 0))", "", "", "WGS84"], + ["", "", "10.0", "20.0", "WGS84"], + ] + # Amalgamate into a list of dicts + all_raw = [{hname: val for hname, val in zip(rawh, ln, strict=True)} for ln in raws] + + # Get the specific mapper + mapper = abis_mapping.templates.survey_site_data_v3.mapping.SurveySiteMapper() + + # Modify schema to only include the necessary fields for test + descriptor = { + "fields": [ + {"name": "siteID", "type": "string"}, + {"name": "footprintWKT", "type": "wkt"}, + {"name": "decimalLongitude", "type": "number"}, + {"name": "decimalLatitude", "type": "number"}, + {"name": "geodeticDatum", "type": "string"}, + ] + } + mocker.patch.object(base.mapper.ABISMapper, "schema").return_value = descriptor + + # Create raw data csv string + output = io.StringIO() + csv_writer = csv.DictWriter(output, fieldnames=rawh) + csv_writer.writeheader() + for row in all_raw: + csv_writer.writerow(row) + csv_data = output.getvalue().encode("utf-8") + + expected = { + "site1": " POINT (2.5 2.5)", + "site2": " POINT (2.5 2.5)", + "site3": " POINT (20 10)", + "site7": " POINT (20 10)", + "site8": " POINT (21 11)", + } + # Invoke method + assert hasattr(mapper, "extract_geometry_defaults") + actual = mapper.extract_geometry_defaults(csv_data) + + # Validate + assert actual == expected + + +class TestSiteIDForeignKeys: + @attrs.define(kw_only=True) + class Scenario: + """Dataclass to hold the scenario parameters.""" + + name: str + raws: list[list[str]] + site_id_map: dict[str, bool] + expected_error_codes: set[str] = set() + + scenarios: list[Scenario] = [ + Scenario( + name="valid_with_site_id_map", + raws=[ + ["site1", "-38.94", "115.21", "POINT(30 10)", "WGS84"], + ["site2", "-38.94", "115.21", "", "GDA2020"], + ["site3", "", "", "LINESTRING(30 10, 10 30, 40 40)", "GDA94"], + ["site4", "", "", "", ""], + # Following shows that non-url safe siteIDs get endcoded when mapping. + ["site a", "-38.94", "115.21", "", "GDA2020"], + ["site/b", "-38.94", "115.21", "", "GDA2020"], + [r"site\c", "-38.94", "115.21", "", "GDA2020"], + ["site\nd", "-38.94", "115.21", "", "GDA2020"], + ], + site_id_map={ + "site4": True, + "siteNone": True, + }, + ), + Scenario( + name="invalid_missing_geometry_and_not_in_map", + raws=[ + ["site1", "", "", "", ""], + ], + site_id_map={"site2": True}, + expected_error_codes={"row-constraint"}, + ), + ] + + @pytest.mark.parametrize( + argnames="scenario", + argvalues=[scenario for scenario in scenarios], + ids=[scenario.name for scenario in scenarios], + ) + def test_apply_validation(self, scenario: Scenario, mocker: pytest_mock.MockerFixture) -> None: + """Tests the apply_validation method with siteID foreign key dictionary provided. + + Args: + scenario (Scenario): The parameters of the scenario under test. + mocker (pytest_mock.MockerFixture): The mocker fixture. + """ + # Construct fake data + rawh = ["siteID", "decimalLatitude", "decimalLongitude", "footprintWKT", "geodeticDatum"] + all_raw = [{hname: val for hname, val in zip(rawh, ln, strict=True)} for ln in scenario.raws] + + # Get mapper + mapper = abis_mapping.templates.survey_site_data_v3.mapping.SurveySiteMapper() + + # Modify schema to only fields required for test + descriptor = {"fields": [field for field in mapper.schema()["fields"] if field["name"] in rawh]} + descriptor["fields"].sort(key=lambda f: rawh.index(f["name"])) + + # Patch the schema for the test + mocker.patch.object(base.mapper.ABISMapper, "schema").return_value = descriptor + + # Create raw data csv string + output = io.StringIO() + csv_writer = csv.DictWriter(output, fieldnames=rawh) + csv_writer.writeheader() + for row in all_raw: + csv_writer.writerow(row) + csv_data = output.getvalue().encode("utf-8") + + # Apply validation + report = mapper.apply_validation( + data=csv_data, + site_id_map=scenario.site_id_map, + ) + + # Assert + assert report.valid == (scenario.expected_error_codes == set()) + if not report.valid: + error_codes = [code for codes in report.flatten(["type"]) for code in codes] + assert set(error_codes) == scenario.expected_error_codes + + +@pytest.mark.parametrize( + "row_dict", + [ + {"footprintWKT": None, "geodeticDatum": None}, + {"footprintWKT": None, "geodeticDatum": "WGS84"}, + {"footprintWKT": "POINT (0 0)", "geodeticDatum": None}, + ], +) +def test_add_footprint_geometry_no_geometry(row_dict: dict[str, Any]) -> None: + """Tests that add_footprint_geometry won't add to graph without valid WKT geometry. + + Args: + row_dict (dict[str, Any]): Raw data row to use for test case. + """ + # Create graph + graph = rdflib.Graph() + + # Create resource + resource = frictionless.Resource(source=[row_dict]) + + # Extract row + with resource.open() as r: + row = next(r.row_stream) + + # Create URI + uri = rdflib.URIRef("http://example.com/abis-mapping/test") + + # Get mapper + mapper = abis_mapping.templates.survey_site_data_v3.mapping.SurveySiteMapper() + + # Call method + mapper.add_footprint_geometry( + uri=uri, + row=row, + graph=graph, + ) + + # Validate no triples added to graph + assert len(graph) == 0 + + +@pytest.mark.parametrize( + "row_dict", + [ + {"decimalLatitude": None, "decimalLongitude": None, "geodeticDatum": None}, + {"decimalLatitude": None, "decimalLongitude": None, "geodeticDatum": "WGS84"}, + {"decimalLatitude": None, "decimalLongitude": 0, "geodeticDatum": None}, + {"decimalLatitude": None, "decimalLongitude": 0, "geodeticDatum": "WGS84"}, + {"decimalLatitude": 0, "decimalLongitude": None, "geodeticDatum": None}, + {"decimalLatitude": 0, "decimalLongitude": None, "geodeticDatum": "WGS84"}, + {"decimalLatitude": 0, "decimalLongitude": 0, "geodeticDatum": None}, + ], +) +def test_add_point_geometry_no_geometry(row_dict: dict[str, Any]) -> None: + """Tests that add_point_geometry method doesn't add to graph for no point geometries. + + Args: + row_dict (dict[str, Any]): Raw data row to use for test case. + """ + # Create graph + graph = rdflib.Graph() + + # Create resource + resource = frictionless.Resource(source=[row_dict]) + + # Extract row + with resource.open() as r: + row = next(r.row_stream) + + # Create URI + uri = rdflib.URIRef("http://example.com/abis-mapping/test") + + # Get mapper + mapper = abis_mapping.templates.survey_site_data_v3.mapping.SurveySiteMapper() + + # Call method + mapper.add_point_geometry( + uri=uri, + row=row, + graph=graph, + ) + + # Validate no triples added to graph + assert len(graph) == 0 diff --git a/tests/templates/test_survey_site_visit_data_v3.py b/tests/templates/test_survey_site_visit_data_v3.py new file mode 100644 index 00000000..2c5c7b58 --- /dev/null +++ b/tests/templates/test_survey_site_visit_data_v3.py @@ -0,0 +1,273 @@ +"""Tests for the survey_site_visit_data v3 template not common to other templates.""" + +# Standard +import csv +import dataclasses +import io +import pathlib + +# Third-party +import pyshacl +import pytest +import pytest_mock +import rdflib + +# Local +from abis_mapping import models +from abis_mapping.templates.survey_site_visit_data_v3 import mapping + +# Typing +from typing import Iterator + + +@pytest.fixture +def mapper() -> Iterator[mapping.SurveySiteVisitMapper]: + """Provides site visit mapper for tests. + + Yields: + SurveySiteVisitMapper: site visit mapper instance. + """ + # Create mapper + mapper = mapping.SurveySiteVisitMapper() + + # Clear schema cache + mapper.schema.cache_clear() + + # Yield mapper + yield mapper + + # Clear schema cache again + mapper.schema.cache_clear() + + +@dataclasses.dataclass +class Scenario: + name: str + start_date: str + end_date: str | None = None + + +scenarios = [ + Scenario( + name="start_date_only", + start_date="2024-10-11", + ), + Scenario( + name="both_dates", + start_date="2024-10-11", + end_date="2025-10-11", + ), +] + + +@pytest.mark.parametrize( + argnames="scenario", + argvalues=scenarios, + ids=[s.name for s in scenarios], +) +def test_add_temporal_coverage_node(scenario: Scenario, mapper: mapping.SurveySiteVisitMapper) -> None: + """Tests the graph output from add_temporal_coverage_node method. + + Args: + scenario (Scenario): Data structure containing test parameters. + mapper (SurveySiteVisitMapper): Site visit mapper instance fixture. + """ + # Parse dates + start_date = models.temporal.parse_timestamp(scenario.start_date) + end_date = None if scenario.end_date is None else models.temporal.parse_timestamp(scenario.end_date) + + # Create graph + graph = rdflib.Graph() + + # Invoke + mapper.add_temporal_coverage_bnode( + graph=graph, + start_date=start_date, + end_date=end_date, + ) + + # Perform validation on shapes + shape_file = pathlib.Path("abis_mapping/base/validators/shapes.ttl") + shape_graph = rdflib.Graph().parse(data=shape_file.read_bytes()) + valid, _, report = pyshacl.validate(data_graph=graph, shacl_graph=shape_graph) + + # If not valid raise assertion error with report output + assert valid, report + + +class TestMapExtractors: + """Tests for the key value extraction methods.""" + + def test_extract_temporal_defaults( + self, + mapper: mapping.SurveySiteVisitMapper, + mocker: pytest_mock.MockerFixture, + ) -> None: + """Tests the extract_temporal_defaults method. + + Args: + mapper (SurveySiteVisitMapper): Site visit mapper instance fixture. + mocker (pytest_mock.MockerFixture): The mocker fixture + """ + # Retrieve actual descriptor + original_descriptor = mapping.SurveySiteVisitMapper.schema() + + # Define fields of relevance for tests + fieldnames = ["surveyID", "siteVisitID", "siteVisitStart", "siteVisitEnd"] + + # Make descriptor only include these fields + descriptor = { + **original_descriptor, + "fields": [f for f in original_descriptor["fields"] if f["name"] in fieldnames], + } + + # Patch schema + mocked_schema = mocker.patch.object(mapping.SurveySiteVisitMapper, "schema", return_value=descriptor) + + # Declare some raw data + expected_rows = [ + { + "surveyID": "A", + "siteVisitID": "SV1", + "siteVisitStart": "2024-10-14", + "siteVisitEnd": "2025-10-14", + }, + { + "surveyID": "A", + "siteVisitID": "SV2", + "siteVisitStart": "2024-10-14", + }, + ] + excluded_rows = [ + # The map should exclude these since there are no + # values for default temporal entity must have start date + { + "surveyID": "A", + "siteVisitID": "SV3", + "siteVisitEnd": "2025-10-14", + }, + { + "surveyID": "A", + "siteVisitID": "SV4", + }, + # map should exclude these because there is no siteVisitID + { + "surveyID": "A", + "siteVisitID": "", + "siteVisitStart": "2024-10-14", + "siteVisitEnd": "2025-10-14", + }, + { + "surveyID": "A", + "siteVisitID": "", + "siteVisitStart": "2024-10-14", + }, + ] + # Build elements for expected map + graphs = [rdflib.Graph() for _ in range(2)] + for g, r in zip(graphs, expected_rows, strict=True): + raw_start: str = r["siteVisitStart"] + raw_end: str | None = r.get("siteVisitEnd") + start = models.temporal.parse_timestamp(raw_start) + end = models.temporal.parse_timestamp(raw_end) if raw_end is not None else None + mapper.add_temporal_coverage_bnode(graph=g, start_date=start, end_date=end) + + # Construct expected map + expected = {r["siteVisitID"]: g.serialize(format="turtle") for g, r in zip(graphs, expected_rows, strict=True)} + + # Create raw data csv string + output = io.StringIO() + csv_writer = csv.DictWriter(output, fieldnames=expected_rows[0].keys()) + csv_writer.writeheader() + for row in expected_rows + excluded_rows: + csv_writer.writerow(row) + csv_data = output.getvalue().encode("utf-8") + + # Invoke + actual = mapper.extract_temporal_defaults(csv_data) + + # Assert + assert actual == expected + mocked_schema.assert_called_once() + + def test_extract_site_visit_id_to_site_id_map( + self, + mapper: mapping.SurveySiteVisitMapper, + mocker: pytest_mock.MockerFixture, + ) -> None: + """Tests the extract_site_visit_id_to_site_id_map method. + + Args: + mapper: Mapper instance fixture. + mocker: The mocker fixture. + """ + # Retrieve actual descriptor + original_descriptor = mapping.SurveySiteVisitMapper.schema() + + # Define fields of relevance for tests + fieldnames = ["surveyID", "siteID", "siteVisitID"] + + # Make descriptor only include these fields + descriptor = { + **original_descriptor, + "fields": [f for f in original_descriptor["fields"] if f["name"] in fieldnames], + } + + # Patch schema + mocked_schema = mocker.patch.object(mapping.SurveySiteVisitMapper, "schema", return_value=descriptor) + + # Declare some raw data + expected_rows: list[dict[str, str | None]] = [ + { + "surveyID": "A", + "siteID": "S1", + "siteVisitID": "SV1", + }, + { + "surveyID": "A", + "siteID": "S1", + "siteVisitID": "SV2", + }, + ] + excluded_rows: list[dict[str, str | None]] = [ + # The map should exclude these since there are no + # values for siteID + { + "surveyID": "A", + "siteID": "", + "siteVisitID": "SV3", + }, + { + "surveyID": "A", + "siteID": None, + "siteVisitID": "SV4", + }, + # map should exclude these because there is no siteVisitID + { + "surveyID": "A", + "siteID": "S2", + "siteVisitID": "", + }, + { + "surveyID": "A", + "siteID": "S3", + "siteVisitID": None, + }, + ] + # Construct expected map + expected = {r["siteVisitID"]: r["siteID"] for r in expected_rows} + + # Create raw data csv string + output = io.StringIO() + csv_writer = csv.DictWriter(output, fieldnames=expected_rows[0].keys()) + csv_writer.writeheader() + for row in expected_rows + excluded_rows: + csv_writer.writerow(row) + csv_data = output.getvalue().encode("utf-8") + + # Invoke + actual = mapper.extract_site_visit_id_to_site_id_map(csv_data) + + # Assert + assert actual == expected + mocked_schema.assert_called_once() From 63a5058c45b8838c450d31acdbca92e8bc8761ef Mon Sep 17 00:00:00 2001 From: Lincoln Puzey Date: Fri, 13 Dec 2024 13:26:23 +0800 Subject: [PATCH 6/8] BDRSPS-1109 Made surveyID a required field in site data file, and cross validate it with metadata template --- .../survey_site_visit_data_v3/mapping.py | 12 +++++- .../survey_site_visit_data_v3/schema.json | 13 ++++++- .../templates/instructions.md | 7 +++- .../test_survey_site_visit_data_v3.py | 37 +++++++++++++++++++ 4 files changed, 65 insertions(+), 4 deletions(-) diff --git a/abis_mapping/templates/survey_site_visit_data_v3/mapping.py b/abis_mapping/templates/survey_site_visit_data_v3/mapping.py index 8a074677..de4d25e1 100644 --- a/abis_mapping/templates/survey_site_visit_data_v3/mapping.py +++ b/abis_mapping/templates/survey_site_visit_data_v3/mapping.py @@ -48,6 +48,9 @@ def apply_validation( data (base.types.ReadableType): Raw data to be validated. **kwargs (Any): Additional keyword arguments. + Keyword Args: + survey_id_set (Set[str]): Set of surveyIDs from the metadata template. + Returns: frictionless.Report: Validation report for the specified data. """ @@ -74,6 +77,13 @@ def apply_validation( ), ] + if "survey_id_set" in kwargs: + checks.append( + plugins.survey_id_validation.SurveyIDValidation( + valid_survey_ids=kwargs["survey_id_set"], + ) + ) + # Validate the site visit resource report: frictionless.Report = resource_site_visit_data.validate( checklist=frictionless.Checklist( @@ -240,7 +250,7 @@ def apply_mapping_row( uri_site = utils.iri_patterns.site_iri(base_iri, row_site_id) # Create TERN survey IRI from surveyID field - row_survey_id: str | None = row["surveyID"] + row_survey_id: str = row["surveyID"] uri_survey = utils.iri_patterns.survey_iri(base_iri, row_survey_id) # URI for the Site Visit Plan diff --git a/abis_mapping/templates/survey_site_visit_data_v3/schema.json b/abis_mapping/templates/survey_site_visit_data_v3/schema.json index c22f20f7..701d8dbf 100644 --- a/abis_mapping/templates/survey_site_visit_data_v3/schema.json +++ b/abis_mapping/templates/survey_site_visit_data_v3/schema.json @@ -3,12 +3,12 @@ { "name": "surveyID", "title": "SurveyID", - "description": "The identifier of the Survey that the Site is related to in this dataset.", + "description": "The identifier of the Survey that the Site Visit is related to in this dataset.", "example": "AR220-01", "type": "string", "format": "default", "constraints": { - "required": false + "required": true } }, { @@ -164,5 +164,14 @@ "SAMPLING_EFFORT_UNIT" ] } + ], + "foreignKeys": [ + { + "fields": "surveyID", + "reference": { + "resource": "survey_metadata", + "fields": "surveyID" + } + } ] } diff --git a/abis_mapping/templates/survey_site_visit_data_v3/templates/instructions.md b/abis_mapping/templates/survey_site_visit_data_v3/templates/instructions.md index 84da4016..598815e2 100644 --- a/abis_mapping/templates/survey_site_visit_data_v3/templates/instructions.md +++ b/abis_mapping/templates/survey_site_visit_data_v3/templates/instructions.md @@ -89,7 +89,12 @@ For example, `instrumentType`, `instrumentIdentifier`, `weatherConditions`. ## CHANGELOG -No changes from Systematic Survey Site Visit Data Template v2.0.0 +Changes from Systematic Survey Site Visit Data Template v2.0.0 + +### CHANGED FIELDS + +* [`surveyID`](#surveyID-field) Is now a **mandatory** field, and every row must have a value that matches a `surveyID` + in the Systematic Survey Metadata template to indicate which Survey the Site Visit is related to. ## APPENDICES ### APPENDIX-I: Vocabulary List diff --git a/tests/templates/test_survey_site_visit_data_v3.py b/tests/templates/test_survey_site_visit_data_v3.py index 2c5c7b58..d7003d59 100644 --- a/tests/templates/test_survey_site_visit_data_v3.py +++ b/tests/templates/test_survey_site_visit_data_v3.py @@ -271,3 +271,40 @@ def test_extract_site_visit_id_to_site_id_map( # Assert assert actual == expected mocked_schema.assert_called_once() + + +def test_validation_with_survey_id_set_valid( + mapper: mapping.SurveySiteVisitMapper, +) -> None: + """Test surveyID cross-validation when the file is valid.""" + example_file = pathlib.Path("abis_mapping/templates/survey_site_visit_data_v3/examples/minimal.csv") + + with example_file.open("rb") as data: + report = mapper.apply_validation( + data, + # provide surveyIDs in the file, to make it valid + survey_id_set={"TIS-24-03": True}, + ) + + assert report.valid + + +def test_validation_with_survey_id_set_invalid( + mapper: mapping.SurveySiteVisitMapper, +) -> None: + """Test surveyID cross-validation when the file is invalid.""" + example_file = pathlib.Path("abis_mapping/templates/survey_site_visit_data_v3/examples/minimal.csv") + + with example_file.open("rb") as data: + report = mapper.apply_validation( + data, + # Don't provide surveyIDs in the file, to make it invalid + survey_id_set={"SOME_OTHER_ID": True}, + ) + + assert not report.valid + assert len(report.tasks) == 1 + assert len(report.tasks[0].errors) == 3 + assert report.tasks[0].errors[0].note == "surveyID must match a surveyID in the survey_metadata template" + assert report.tasks[0].errors[1].note == "surveyID must match a surveyID in the survey_metadata template" + assert report.tasks[0].errors[2].note == "surveyID must match a surveyID in the survey_metadata template" From 456aa79454c3c6da1f56d05b6b70a2ca6e3ac540 Mon Sep 17 00:00:00 2001 From: Lincoln Puzey Date: Fri, 13 Dec 2024 14:40:55 +0800 Subject: [PATCH 7/8] BDRSPS-1109 Cross validate surveyID in occurrence template with metadata template only when it is provided --- .../survey_occurrence_data_v3/mapping.py | 9 +++++ .../survey_occurrence_data_v3/schema.json | 7 ++++ .../templates/instructions.md | 7 +++- .../test_survey_occurrence_data_v3.py | 40 +++++++++++++++++++ 4 files changed, 62 insertions(+), 1 deletion(-) diff --git a/abis_mapping/templates/survey_occurrence_data_v3/mapping.py b/abis_mapping/templates/survey_occurrence_data_v3/mapping.py index deb72a03..c6930aeb 100644 --- a/abis_mapping/templates/survey_occurrence_data_v3/mapping.py +++ b/abis_mapping/templates/survey_occurrence_data_v3/mapping.py @@ -80,6 +80,7 @@ def apply_validation(self, data: base.types.ReadableType, **kwargs: Any) -> fric **kwargs (Any): Additional keyword arguments. Keyword Args: + survey_id_set (Set[str]): Set of surveyIDs from the metadata template. site_id_geometry_map (dict[str, str]): Default values to use for geometry for given siteID. site_visit_id_temporal_map (dict[str, str]): Default RDF (serialized as turtle) @@ -90,6 +91,7 @@ def apply_validation(self, data: base.types.ReadableType, **kwargs: Any) -> fric frictionless.Report: Validation report for the specified data. """ # Extract kwargs + survey_id_set = kwargs.get("survey_id_set") site_id_geometry_map = kwargs.get("site_id_geometry_map") site_visit_id_temporal_map = kwargs.get("site_visit_id_temporal_map") site_visit_id_site_id_map = kwargs.get("site_visit_id_site_id_map") @@ -133,6 +135,13 @@ def apply_validation(self, data: base.types.ReadableType, **kwargs: Any) -> fric ], ) + if survey_id_set is not None: + checklist.add_check( + plugins.survey_id_validation.SurveyIDValidation( + valid_survey_ids=survey_id_set, + ) + ) + # Modify checklist in the event site visit id to site id map provided if site_visit_id_site_id_map is not None: # Add lookup match check diff --git a/abis_mapping/templates/survey_occurrence_data_v3/schema.json b/abis_mapping/templates/survey_occurrence_data_v3/schema.json index 97cc506a..18962000 100644 --- a/abis_mapping/templates/survey_occurrence_data_v3/schema.json +++ b/abis_mapping/templates/survey_occurrence_data_v3/schema.json @@ -752,6 +752,13 @@ } ], "foreignKeys": [ + { + "fields": "surveyID", + "reference": { + "resource": "survey_metadata", + "fields": "surveyID" + } + }, { "fields": "siteID", "reference": { diff --git a/abis_mapping/templates/survey_occurrence_data_v3/templates/instructions.md b/abis_mapping/templates/survey_occurrence_data_v3/templates/instructions.md index dc9a2751..39257d9e 100644 --- a/abis_mapping/templates/survey_occurrence_data_v3/templates/instructions.md +++ b/abis_mapping/templates/survey_occurrence_data_v3/templates/instructions.md @@ -97,7 +97,12 @@ datatype format, and examples. ## CHANGELOG -No changes from Systematic Survey Occurrence Data Template v2.0.0 +Changes from Systematic Survey Occurrence Data Template v2.0.0 + +### CHANGED FIELDS + +* When [`surveyID`](#surveyID-field) is provided, it must have a value that matches a `surveyID` + in the Systematic Survey Metadata template to indicate which Survey the Occurrence belongs to. ## APPENDICES ### APPENDIX-I: Vocabulary List diff --git a/tests/templates/test_survey_occurrence_data_v3.py b/tests/templates/test_survey_occurrence_data_v3.py index ef0fca6b..d56350f9 100644 --- a/tests/templates/test_survey_occurrence_data_v3.py +++ b/tests/templates/test_survey_occurrence_data_v3.py @@ -563,3 +563,43 @@ def test_extract_site_id_keys( # Validate assert actual == expected + + +def test_validation_with_survey_id_set_valid( + mapper: Mapper, +) -> None: + """Test surveyID cross-validation when the file is valid.""" + example_file = pathlib.Path( + "abis_mapping/templates/survey_occurrence_data_v3/examples/margaret_river_flora/margaret_river_flora.csv" + ) + + with example_file.open("rb") as data: + report = mapper.apply_validation( + data, + # provide surveyIDs in the file, to make it valid + survey_id_set={"MR-R1": True}, + ) + + assert report.valid + + +def test_validation_with_survey_id_set_invalid( + mapper: Mapper, +) -> None: + """Test surveyID cross-validation when the file is invalid.""" + example_file = pathlib.Path( + "abis_mapping/templates/survey_occurrence_data_v3/examples/margaret_river_flora/margaret_river_flora.csv" + ) + + with example_file.open("rb") as data: + report = mapper.apply_validation( + data, + # Don't provide surveyIDs in the file, to make it invalid + survey_id_set={"SOME_OTHER_ID": True}, + ) + + assert not report.valid + assert len(report.tasks) == 1 + assert len(report.tasks[0].errors) == 2 + assert report.tasks[0].errors[0].note == "surveyID must match a surveyID in the survey_metadata template" + assert report.tasks[0].errors[1].note == "surveyID must match a surveyID in the survey_metadata template" From b7df1799a984a540831c9b8fa91c085c62d17559 Mon Sep 17 00:00:00 2001 From: Lincoln Puzey Date: Fri, 13 Dec 2024 14:41:45 +0800 Subject: [PATCH 8/8] BDRSPS-1109 When surveyID is missing from an occurrence, treat it as incidental i.e. don't link it to a Survey in the RDF output. --- .../margaret_river_flora.ttl | 45 ++++++------------- .../examples/organism_qty.ttl | 6 +-- .../survey_occurrence_data_v3/mapping.py | 19 +++++--- .../survey_occurrence_data_v3/schema.json | 2 +- .../templates/instructions.md | 1 + .../validators/validator.ttl | 7 +-- 6 files changed, 34 insertions(+), 46 deletions(-) diff --git a/abis_mapping/templates/survey_occurrence_data_v3/examples/margaret_river_flora/margaret_river_flora.ttl b/abis_mapping/templates/survey_occurrence_data_v3/examples/margaret_river_flora/margaret_river_flora.ttl index 7ff9007b..301a611b 100644 --- a/abis_mapping/templates/survey_occurrence_data_v3/examples/margaret_river_flora/margaret_river_flora.ttl +++ b/abis_mapping/templates/survey_occurrence_data_v3/examples/margaret_river_flora/margaret_river_flora.ttl @@ -1464,8 +1464,7 @@ a dwc:Occurrence, tern:FeatureOfInterest ; sosa:usedProcedure ; - schema:isPartOf , - ; + schema:isPartOf ; schema:spatial _:N7bfc9936b099cf9353fc575500000021 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1488,8 +1487,7 @@ tern:FeatureOfInterest ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf , - ; + schema:isPartOf ; schema:spatial _:N7bfc9936b099cf9353fc575500000000 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1501,8 +1499,7 @@ tern:FeatureOfInterest ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf , - ; + schema:isPartOf ; schema:spatial _:N7bfc9936b099cf9353fc57550000001b ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1514,8 +1511,7 @@ tern:FeatureOfInterest ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf , - ; + schema:isPartOf ; schema:spatial _:N7bfc9936b099cf9353fc57550000001e ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1527,8 +1523,7 @@ tern:FeatureOfInterest ; sosa:usedProcedure ; schema:identifier "PE:12:8831" ; - schema:isPartOf , - ; + schema:isPartOf ; schema:spatial _:N7bfc9936b099cf9353fc575500000003 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1540,8 +1535,7 @@ tern:FeatureOfInterest ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf , - ; + schema:isPartOf ; schema:spatial _:N7bfc9936b099cf9353fc575500000006 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1553,8 +1547,7 @@ tern:FeatureOfInterest ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf , - ; + schema:isPartOf ; schema:spatial _:N7bfc9936b099cf9353fc575500000009 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1566,8 +1559,7 @@ tern:FeatureOfInterest ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf , - ; + schema:isPartOf ; schema:spatial _:N7bfc9936b099cf9353fc57550000000c ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1579,8 +1571,7 @@ tern:FeatureOfInterest ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf , - ; + schema:isPartOf ; schema:spatial _:N7bfc9936b099cf9353fc57550000000f ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1592,8 +1583,7 @@ tern:FeatureOfInterest ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf , - ; + schema:isPartOf ; schema:spatial _:N7bfc9936b099cf9353fc575500000012 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1605,8 +1595,7 @@ tern:FeatureOfInterest ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf , - ; + schema:isPartOf ; schema:spatial _:N7bfc9936b099cf9353fc575500000015 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1618,8 +1607,7 @@ tern:FeatureOfInterest ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf , - ; + schema:isPartOf ; schema:spatial _:N7bfc9936b099cf9353fc575500000018 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1650,8 +1638,7 @@ dwc:collectionCode "C01" ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf , - ; + schema:isPartOf ; schema:spatial _:N7bfc9936b099cf9353fc575500000024 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1665,8 +1652,7 @@ dwc:collectionCode "C01" ; prov:wasAssociatedWith ; sosa:usedProcedure ; - schema:isPartOf , - ; + schema:isPartOf ; schema:spatial _:N7bfc9936b099cf9353fc575500000027 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -1739,9 +1725,6 @@ tern:featureType ; tern:locationDescription "Cowaramup Bay Road" . - a tern:Survey ; - schema:isPartOf . - a prov:Agent ; schema:name "Stream Environment and Water Pty Ltd" . diff --git a/abis_mapping/templates/survey_occurrence_data_v3/examples/organism_qty.ttl b/abis_mapping/templates/survey_occurrence_data_v3/examples/organism_qty.ttl index 986d700c..ffa0115d 100644 --- a/abis_mapping/templates/survey_occurrence_data_v3/examples/organism_qty.ttl +++ b/abis_mapping/templates/survey_occurrence_data_v3/examples/organism_qty.ttl @@ -51,9 +51,6 @@ schema:isPartOf ; tern:featureType . - a tern:Survey ; - schema:isPartOf . - a prov:Attribution ; prov:agent ; prov:hadRole . @@ -79,8 +76,7 @@ tern:FeatureOfInterest ; sosa:isSampleOf ; sosa:usedProcedure ; - schema:isPartOf , - ; + schema:isPartOf ; schema:spatial _:Nb0c3d4fa822b88b4d3f8743700000000 ; schema:temporal [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; diff --git a/abis_mapping/templates/survey_occurrence_data_v3/mapping.py b/abis_mapping/templates/survey_occurrence_data_v3/mapping.py index c6930aeb..d5e64aa1 100644 --- a/abis_mapping/templates/survey_occurrence_data_v3/mapping.py +++ b/abis_mapping/templates/survey_occurrence_data_v3/mapping.py @@ -544,9 +544,12 @@ def apply_mapping_row( # Create URIs for Survey-related fields (i.e. fields not on the incidental template) - # Create TERN survey IRI from surveyID field + # Create TERN survey IRI from surveyID field, only when it is provided survey_id: str | None = row["surveyID"] - survey = utils.iri_patterns.survey_iri(base_iri, survey_id) + if survey_id: + survey = utils.iri_patterns.survey_iri(base_iri, survey_id) + else: + survey = None # Create Tern Site IRI, depending on the siteID field site_id: str | None = row["siteID"] @@ -4244,7 +4247,7 @@ def add_sensitivity_category_collection( def add_survey( self, - uri: rdflib.URIRef, + uri: rdflib.URIRef | None, dataset: rdflib.URIRef, graph: rdflib.Graph, ) -> None: @@ -4257,6 +4260,9 @@ def add_survey( dataset: The dataset URI graph: The graph to update """ + if uri is None: + return + # Add type graph.add((uri, a, utils.namespaces.TERN.Survey)) # Add dataset link @@ -4302,7 +4308,7 @@ def add_occurrence( other_catalog_numbers_datatype: rdflib.URIRef | None, catalog_number_datatype: rdflib.URIRef | None, provider_recorded_by: rdflib.URIRef | None, - survey: rdflib.URIRef, + survey: rdflib.URIRef | None, site: rdflib.URIRef | None, site_visit: rdflib.URIRef | None, dataset: rdflib.URIRef, @@ -4457,8 +4463,9 @@ def add_occurrence( # Add to Graph graph.add((uri, utils.namespaces.DWC.collectionCode, rdflib.Literal(row["collectionCode"]))) - # Add survey - graph.add((uri, rdflib.SDO.isPartOf, survey)) + # Add survey, if provided + if survey: + graph.add((uri, rdflib.SDO.isPartOf, survey)) # Add site if provided if site is not None: diff --git a/abis_mapping/templates/survey_occurrence_data_v3/schema.json b/abis_mapping/templates/survey_occurrence_data_v3/schema.json index 18962000..72262a92 100644 --- a/abis_mapping/templates/survey_occurrence_data_v3/schema.json +++ b/abis_mapping/templates/survey_occurrence_data_v3/schema.json @@ -720,7 +720,7 @@ { "name": "surveyID", "title": "Survey ID", - "description": "The identifier of the Survey that the occurrence comes from. This field should be completed if it is ambiguous as to which survey the occurrence belongs to.", + "description": "The identifier of the Survey that the Occurrence comes from. If this field is left blank, the Occurrence will be treated as incidental.", "example": "AR220-01", "type": "string", "format": "default", diff --git a/abis_mapping/templates/survey_occurrence_data_v3/templates/instructions.md b/abis_mapping/templates/survey_occurrence_data_v3/templates/instructions.md index 39257d9e..7c1bae21 100644 --- a/abis_mapping/templates/survey_occurrence_data_v3/templates/instructions.md +++ b/abis_mapping/templates/survey_occurrence_data_v3/templates/instructions.md @@ -103,6 +103,7 @@ Changes from Systematic Survey Occurrence Data Template v2.0.0 * When [`surveyID`](#surveyID-field) is provided, it must have a value that matches a `surveyID` in the Systematic Survey Metadata template to indicate which Survey the Occurrence belongs to. + When [`surveyID`](#surveyID-field) is blank, the Occurrence will be treated as incidental. ## APPENDICES ### APPENDIX-I: Vocabulary List diff --git a/abis_mapping/templates/survey_occurrence_data_v3/validators/validator.ttl b/abis_mapping/templates/survey_occurrence_data_v3/validators/validator.ttl index 58e12772..d5f44661 100644 --- a/abis_mapping/templates/survey_occurrence_data_v3/validators/validator.ttl +++ b/abis_mapping/templates/survey_occurrence_data_v3/validators/validator.ttl @@ -236,11 +236,12 @@ bdrsh:SurveyOccurrenceShape sh:maxCount 1 ; sh:class prov:Agent ; ] ; - # dwc:Occurrence has 2 schema:isPartOf triples, one for the tern:Survey and one for the tern:Dataset: + # dwc:Occurrence always has 1 schema:isPartOf triple for the tern:Dataset, + # and optionally a second one for the tern:Survey. sh:property [ sh:path schema:isPartOf ; sh:nodeKind sh:IRI ; - sh:minCount 2 ; + sh:minCount 1 ; sh:maxCount 2 ; ] ; sh:property [ @@ -249,7 +250,7 @@ bdrsh:SurveyOccurrenceShape sh:nodeKind sh:IRI ; sh:class tern:Survey ; ] ; - sh:qualifiedMinCount 1 ; + sh:qualifiedMinCount 0 ; sh:qualifiedMaxCount 1 ; ] ; sh:property [