diff --git a/CHANGELOG.md b/CHANGELOG.md index c4bb5eee..ebbe0d71 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -# October 2024 +# December 2024 ## Templates ### Incidental Occurrence v3.0.0 #### [Schema v3.0.0](https://github.com/gaiaresources/abis-mapping/blob/main/abis_mapping/templates/incidental_occurrence_data_v3/schema.json) changes (by column order). diff --git a/abis_mapping/templates/survey_metadata_v3/README.md b/abis_mapping/templates/survey_metadata_v3/README.md new file mode 100644 index 00000000..bce67b50 --- /dev/null +++ b/abis_mapping/templates/survey_metadata_v3/README.md @@ -0,0 +1,5 @@ +# Template Description +TBC + +# Template Instructions +See `instructions.pdf` for more details diff --git a/abis_mapping/templates/survey_metadata_v3/examples/minimal.csv b/abis_mapping/templates/survey_metadata_v3/examples/minimal.csv new file mode 100644 index 00000000..a3e93b7e --- /dev/null +++ b/abis_mapping/templates/survey_metadata_v3/examples/minimal.csv @@ -0,0 +1,2 @@ +surveyID,surveyName,surveyPurpose,surveyType,surveyStart,surveyEnd,targetTaxonomicScope,targetHabitatScope,spatialCoverageWKT,geodeticDatum,surveyOrgs,surveyMethodCitation,surveyMethodDescription,surveyMethodURL,keywords +COL1,"Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits - Summer",Summer sampling for peak insect diversity.,Wet pitfall trapping,21/01/2015,3/02/2015,Coleoptera | Insecta,Woodland,"POLYGON ((146.363 -33.826, 148.499 -33.826, 148.499 -34.411, 146.363 -33.826))",GDA2020,"NSW Department of Planning, Industry and Environment | CSIRO","Ng, K., Barton, P.S., Blanchard, W. et al. Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits. Oecologia 188, 645–657 (2018). https://doi.org/10.1007/s00442-018-4180-9""","Our experimental design consisted of four 400 m transects running from inside each woodland patch out into four adjoining farmland uses (crop, rested, woody debris application, revegetation plantings). To quantify potential edge efects on beetle species traits, we sampled beetles at five locations along each transect: 200 and 20 m inside woodlands, 200 and 20 m inside farmlands, and at the woodland–farmland edge (0 m). Each sampling location comprised a pair of wet invertebrate pitfall traps. separated by a drift fence (60 cm long x 10 cm high) to help direct arthropods into traps. We opened a total of 220 pairs of traps for 14 days during spring (Oct–Nov 2014), and repeated sampling during summer (January–February 2015). Beetle samples from each pitfall trap pair, and across the two time periods, were pooled to provide one sample per sampling location.",https://doi.org/10.1002/9781118945568.ch11 | https://biocollect.ala.org.au/document/download/2022-01/202201%20CBR%20Flora%20and%20Vegetation%20report_draftv1.pdf ,ground beetle | habitat | morphology | traits | farmland | woodland | remnant vegetation | split-plot study diff --git a/abis_mapping/templates/survey_metadata_v3/examples/minimal.ttl b/abis_mapping/templates/survey_metadata_v3/examples/minimal.ttl new file mode 100644 index 00000000..06ac4275 --- /dev/null +++ b/abis_mapping/templates/survey_metadata_v3/examples/minimal.ttl @@ -0,0 +1,161 @@ +@prefix abis: . +@prefix bdr: . +@prefix geo: . +@prefix prov: . +@prefix rdf: . +@prefix rdfs: . +@prefix schema: . +@prefix skos: . +@prefix tern: . +@prefix time: . +@prefix xsd: . + + a schema:Collection ; + schema:isPartOf ; + schema:member ; + schema:name "Survey Collection - Survey Type - Wet pitfall trapping" ; + tern:hasAttribute . + + a schema:Collection ; + schema:isPartOf ; + schema:member ; + schema:name "Survey Collection - Target Habitat Scope - Woodland" ; + tern:hasAttribute . + + a schema:Collection ; + schema:isPartOf ; + schema:member ; + schema:name "Survey Collection - Target Taxonomic Scope - Coleoptera" ; + tern:hasAttribute . + + a schema:Collection ; + schema:isPartOf ; + schema:member ; + schema:name "Survey Collection - Target Taxonomic Scope - Insecta" ; + tern:hasAttribute . + + a abis:Project ; + schema:hasPart ; + schema:identifier "COL1" ; + schema:isPartOf ; + schema:name "Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits - Summer" . + + a rdfs:Datatype ; + skos:prefLabel "surveyID source" ; + prov:qualifiedAttribution [ a prov:Attribution ; + prov:agent ; + prov:hadRole ] . + + a rdfs:Datatype ; + skos:prefLabel "surveyID source" ; + prov:qualifiedAttribution [ a prov:Attribution ; + prov:agent ; + prov:hadRole ] . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "Wet pitfall trapping" ; + tern:hasValue . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "Woodland" ; + tern:hasValue . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "Coleoptera" ; + tern:hasValue . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "Insecta" ; + tern:hasValue . + + a skos:Concept ; + skos:broader ; + skos:definition "A type of targetTaxonomicScope" ; + skos:inScheme ; + skos:prefLabel "Coleoptera" ; + schema:citation "https://linked.data.gov.au/dataset/bdr/00000000-0000-0000-0000-000000000000"^^xsd:anyURI . + + a skos:Concept ; + skos:broader ; + skos:definition "A type of targetTaxonomicScope" ; + skos:inScheme ; + skos:prefLabel "Insecta" ; + schema:citation "https://linked.data.gov.au/dataset/bdr/00000000-0000-0000-0000-000000000000"^^xsd:anyURI . + + a tern:IRI, + tern:Value ; + rdfs:label "Wet pitfall trapping" ; + rdf:value . + + a tern:IRI, + tern:Value ; + rdfs:label "Woodland" ; + rdf:value . + + a tern:IRI, + tern:Value ; + rdfs:label "Coleoptera" ; + rdf:value . + + a tern:IRI, + tern:Value ; + rdfs:label "Insecta" ; + rdf:value . + + a prov:Agent ; + schema:name "CSIRO" . + + a prov:Agent ; + schema:name "NSW Department of Planning, Industry and Environment" . + + a tern:Survey ; + bdr:purpose "Summer sampling for peak insect diversity." ; + bdr:target "Coleoptera", + "Insecta" ; + geo:hasGeometry _:N27fdbd9c9077333e51ac0d0600000000 ; + time:hasTime [ a time:TemporalEntity ; + time:hasBeginning [ a time:Instant ; + time:inXSDDate "2015-01-21"^^xsd:date ] ; + time:hasEnd [ a time:Instant ; + time:inXSDDate "2015-02-03"^^xsd:date ] ] ; + prov:hadPlan ; + schema:identifier "COL1"^^, + "COL1"^^ ; + schema:keywords "farmland", + "ground beetle", + "habitat", + "morphology", + "remnant vegetation", + "split-plot study", + "traits", + "woodland" ; + schema:name "Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits - Summer" . + + a prov:Plan ; + schema:citation "Ng, K., Barton, P.S., Blanchard, W. et al. Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits. Oecologia 188, 645–657 (2018). https://doi.org/10.1007/s00442-018-4180-9\"" ; + schema:description "Our experimental design consisted of four 400 m transects running from inside each woodland patch out into four adjoining farmland uses (crop, rested, woody debris application, revegetation plantings). To quantify potential edge efects on beetle species traits, we sampled beetles at five locations along each transect: 200 and 20 m inside woodlands, 200 and 20 m inside farmlands, and at the woodland–farmland edge (0 m). Each sampling location comprised a pair of wet invertebrate pitfall traps. separated by a drift fence (60 cm long x 10 cm high) to help direct arthropods into traps. We opened a total of 220 pairs of traps for 14 days during spring (Oct–Nov 2014), and repeated sampling during summer (January–February 2015). Beetle samples from each pitfall trap pair, and across the two time periods, were pooled to provide one sample per sampling location." ; + schema:isPartOf ; + schema:url "https://biocollect.ala.org.au/document/download/2022-01/202201%20CBR%20Flora%20and%20Vegetation%20report_draftv1.pdf"^^xsd:anyURI, + "https://doi.org/10.1002/9781118945568.ch11"^^xsd:anyURI . + + a tern:Dataset . + +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " POLYGON ((-33.826 146.363, -33.826 148.499, -34.411 148.499, -33.826 146.363))"^^geo:wktLiteral ] ; + rdf:object _:N27fdbd9c9077333e51ac0d0600000000 ; + rdf:predicate geo:hasGeometry ; + rdf:subject ; + rdfs:comment "supplied as" . + +_:N27fdbd9c9077333e51ac0d0600000000 a geo:Geometry ; + geo:asWKT " POLYGON ((-33.826 146.363, -33.826 148.499, -34.411 148.499, -33.826 146.363))"^^geo:wktLiteral . + diff --git a/abis_mapping/templates/survey_metadata_v3/examples/minimal_error_chronological_order.csv b/abis_mapping/templates/survey_metadata_v3/examples/minimal_error_chronological_order.csv new file mode 100644 index 00000000..b4e8d7c4 --- /dev/null +++ b/abis_mapping/templates/survey_metadata_v3/examples/minimal_error_chronological_order.csv @@ -0,0 +1,2 @@ +surveyID,surveyName,surveyPurpose,surveyType,surveyStart,surveyEnd,targetTaxonomicScope,targetHabitatScope,spatialCoverageWKT,geodeticDatum,surveyOrgs,surveyMethodCitation,surveyMethodDescription,surveyMethodURL,keywords +COL1,"Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits - Summer",Summer sampling for peak insect diversity.,Wet pitfall trapping,21/01/2015,3/02/2014,Coleoptera | Insecta,Woodland,"POLYGON ((146.363 -33.826, 148.499 -33.826, 148.499 -34.411, 146.363 -33.826))",GDA2020,"NSW Department of Planning, Industry and Environment | CSIRO","Ng, K., Barton, P.S., Blanchard, W. et al. Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits. Oecologia 188, 645–657 (2018). https://doi.org/10.1007/s00442-018-4180-9""","Our experimental design consisted of four 400 m transects running from inside each woodland patch out into four adjoining farmland uses (crop, rested, woody debris application, revegetation plantings). To quantify potential edge efects on beetle species traits, we sampled beetles at five locations along each transect: 200 and 20 m inside woodlands, 200 and 20 m inside farmlands, and at the woodland–farmland edge (0 m). Each sampling location comprised a pair of wet invertebrate pitfall traps. separated by a drift fence (60 cm long x 10 cm high) to help direct arthropods into traps. We opened a total of 220 pairs of traps for 14 days during spring (Oct–Nov 2014), and repeated sampling during summer (January–February 2015). Beetle samples from each pitfall trap pair, and across the two time periods, were pooled to provide one sample per sampling location.",https://doi.org/10.1002/9781118945568.ch11 | https://biocollect.ala.org.au/document/download/2022-01/202201%20CBR%20Flora%20and%20Vegetation%20report_draftv1.pdf ,ground beetle | habitat | morphology | traits | farmland | woodland | remnant vegetation | split-plot study diff --git a/abis_mapping/templates/survey_metadata_v3/examples/minimal_error_missing_datum.csv b/abis_mapping/templates/survey_metadata_v3/examples/minimal_error_missing_datum.csv new file mode 100644 index 00000000..e70fd20d --- /dev/null +++ b/abis_mapping/templates/survey_metadata_v3/examples/minimal_error_missing_datum.csv @@ -0,0 +1,2 @@ +surveyID,surveyName,surveyPurpose,surveyType,surveyStart,surveyEnd,targetTaxonomicScope,targetHabitatScope,spatialCoverageWKT,geodeticDatum,surveyOrgs,surveyMethodCitation,surveyMethodDescription,surveyMethodURL,keywords +COL1,"Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits - Summer",Summer sampling for peak insect diversity.,Wet pitfall trapping,21/01/2015,3/02/2015,Coleoptera | Insecta,Woodland,"POLYGON ((146.363 -33.826, 148.499 -33.826, 148.499 -34.411, 146.363 -33.826))",,"NSW Department of Planning, Industry and Environment | CSIRO","Ng, K., Barton, P.S., Blanchard, W. et al. Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits. Oecologia 188, 645–657 (2018). https://doi.org/10.1007/s00442-018-4180-9""","Our experimental design consisted of four 400 m transects running from inside each woodland patch out into four adjoining farmland uses (crop, rested, woody debris application, revegetation plantings). To quantify potential edge efects on beetle species traits, we sampled beetles at five locations along each transect: 200 and 20 m inside woodlands, 200 and 20 m inside farmlands, and at the woodland–farmland edge (0 m). Each sampling location comprised a pair of wet invertebrate pitfall traps. separated by a drift fence (60 cm long x 10 cm high) to help direct arthropods into traps. We opened a total of 220 pairs of traps for 14 days during spring (Oct–Nov 2014), and repeated sampling during summer (January–February 2015). Beetle samples from each pitfall trap pair, and across the two time periods, were pooled to provide one sample per sampling location.",https://doi.org/10.1002/9781118945568.ch11 | https://biocollect.ala.org.au/document/download/2022-01/202201%20CBR%20Flora%20and%20Vegetation%20report_draftv1.pdf ,ground beetle | habitat | morphology | traits | farmland | woodland | remnant vegetation | split-plot study diff --git a/abis_mapping/templates/survey_metadata_v3/examples/minimal_error_too_many_rows.csv b/abis_mapping/templates/survey_metadata_v3/examples/minimal_error_too_many_rows.csv new file mode 100644 index 00000000..6dbb5c7f --- /dev/null +++ b/abis_mapping/templates/survey_metadata_v3/examples/minimal_error_too_many_rows.csv @@ -0,0 +1,3 @@ +surveyID,surveyName,surveyPurpose,surveyType,surveyStart,surveyEnd,targetTaxonomicScope,targetHabitatScope,spatialCoverageWKT,geodeticDatum,surveyOrgs,surveyMethodCitation,surveyMethodDescription,surveyMethodURL,keywords +COL1,"Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits - Summer",Summer sampling for peak insect diversity.,Wet pitfall trapping,21/01/2015,3/02/2015,Coleoptera | Insecta,Woodland,"POLYGON ((146.363 -33.826, 148.499 -33.826, 148.499 -34.411, 146.363 -33.826))",GDA2020,"NSW Department of Planning, Industry and Environment | CSIRO","Ng, K., Barton, P.S., Blanchard, W. et al. Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits. Oecologia 188, 645–657 (2018). https://doi.org/10.1007/s00442-018-4180-9""","Our experimental design consisted of four 400 m transects running from inside each woodland patch out into four adjoining farmland uses (crop, rested, woody debris application, revegetation plantings). To quantify potential edge efects on beetle species traits, we sampled beetles at five locations along each transect: 200 and 20 m inside woodlands, 200 and 20 m inside farmlands, and at the woodland–farmland edge (0 m). Each sampling location comprised a pair of wet invertebrate pitfall traps. separated by a drift fence (60 cm long x 10 cm high) to help direct arthropods into traps. We opened a total of 220 pairs of traps for 14 days during spring (Oct–Nov 2014), and repeated sampling during summer (January–February 2015). Beetle samples from each pitfall trap pair, and across the two time periods, were pooled to provide one sample per sampling location.",https://doi.org/10.1002/9781118945568.ch11 | https://biocollect.ala.org.au/document/download/2022-01/202201%20CBR%20Flora%20and%20Vegetation%20report_draftv1.pdf ,ground beetle | habitat | morphology | traits | farmland | woodland | remnant vegetation | split-plot study +COL2,"Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits - Summer",Summer sampling for peak insect diversity.,Wet pitfall trapping,21/01/2015,3/02/2015,Coleoptera | Insecta,Woodland,"POLYGON ((146.363 -33.826, 148.499 -33.826, 148.499 -34.411, 146.363 -33.826))",GDA2020,"NSW Department of Planning, Industry and Environment | CSIRO","Ng, K., Barton, P.S., Blanchard, W. et al. Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits. Oecologia 188, 645–657 (2018). https://doi.org/10.1007/s00442-018-4180-9""","Our experimental design consisted of four 400 m transects running from inside each woodland patch out into four adjoining farmland uses (crop, rested, woody debris application, revegetation plantings). To quantify potential edge efects on beetle species traits, we sampled beetles at five locations along each transect: 200 and 20 m inside woodlands, 200 and 20 m inside farmlands, and at the woodland–farmland edge (0 m). Each sampling location comprised a pair of wet invertebrate pitfall traps. separated by a drift fence (60 cm long x 10 cm high) to help direct arthropods into traps. We opened a total of 220 pairs of traps for 14 days during spring (Oct–Nov 2014), and repeated sampling during summer (January–February 2015). Beetle samples from each pitfall trap pair, and across the two time periods, were pooled to provide one sample per sampling location.",https://doi.org/10.1002/9781118945568.ch11 | https://biocollect.ala.org.au/document/download/2022-01/202201%20CBR%20Flora%20and%20Vegetation%20report_draftv1.pdf ,ground beetle | habitat | morphology | traits | farmland | woodland | remnant vegetation | split-plot study diff --git a/abis_mapping/templates/survey_metadata_v3/mapping.py b/abis_mapping/templates/survey_metadata_v3/mapping.py new file mode 100644 index 00000000..a9861be1 --- /dev/null +++ b/abis_mapping/templates/survey_metadata_v3/mapping.py @@ -0,0 +1,903 @@ +"""Provides ABIS mapper for `survey_metadata.csv` template v3""" + +# Standard +import dataclasses + +# Third-party +import frictionless +import frictionless.checks +import rdflib + +# Local +from abis_mapping import base +from abis_mapping import plugins +from abis_mapping import models +from abis_mapping import utils + +# Typing +from typing import Any + + +# Constants / shortcuts +a = rdflib.RDF.type +PRINCIPAL_INVESTIGATOR = rdflib.URIRef("https://linked.data.gov.au/def/data-roles/principalInvestigator") +CONCEPT_SURVEY_TYPE = utils.rdf.uri("concept/surveyType", utils.namespaces.EXAMPLE) +CONCEPT_TARGET_HABITAT_SCOPE = rdflib.URIRef("https://linked.data.gov.au/def/nrm/ae2c88be-63d5-44d3-95ac-54b14c4a4b28") +CONCEPT_TARGET_TAXONOMIC_SCOPE = rdflib.URIRef( + "https://linked.data.gov.au/def/nrm/7ea12fed-6b87-4c20-9ab4-600b32ce15ec", +) + + +# Dataclass used in mapping +@dataclasses.dataclass +class SurveyIDDatatype: + """Contains data items for a survey organisation""" + + name: str + datatype: rdflib.URIRef + agent: rdflib.URIRef + + +@dataclasses.dataclass +class AttributeValue: + """Contains data items to enable producing attribute, value and collection nodes""" + + raw: str + attribute: rdflib.URIRef + value: rdflib.URIRef + collection: rdflib.URIRef + + +class SurveyMetadataMapper(base.mapper.ABISMapper): + """ABIS mapper for `survey_metadata.csv` v3""" + + def apply_validation(self, data: base.types.ReadableType, **kwargs: Any) -> frictionless.Report: + """Applies Frictionless validation for the 'survey_metadata.csv' template + + Args: + data (base.types.ReadableType): Raw data to be validated + **kwargs (Any): Additional keyword arguments. + + Returns: + frictionless.Report: Validation report for the specified data. + """ + # Construct Schema + schema = self.extra_fields_schema( + data=data, + full_schema=True, + ) + + # Construct Resource + resource = frictionless.Resource( + source=data, + format="csv", # TODO -> Hardcoded to csv for now + schema=schema, + encoding="utf-8", + ) + + # Validate + report: frictionless.Report = resource.validate( + checklist=frictionless.Checklist( + checks=[ + # Enforces non-empty and maximum row count. + frictionless.checks.table_dimensions(max_rows=1, min_rows=1), + # Extra Custom Checks + plugins.tabular.IsTabular(), + plugins.chronological.ChronologicalOrder( + field_names=[ + "surveyStart", + "surveyEnd", + ] + ), + plugins.mutual_inclusion.MutuallyInclusive( + field_names=[ + "spatialCoverageWKT", + "geodeticDatum", + ] + ), + ], + ), + ) + + # Return validation report + return report + + def apply_mapping_row( + self, + *, + row: frictionless.Row, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + extra_schema: frictionless.Schema, + base_iri: rdflib.Namespace, + **kwargs: Any, + ) -> None: + """Applies mapping for a row in the `survey_metadata.csv` template. + + Args: + row (frictionless.Row): Row to be processed in the dataset. + dataset (rdflib.URIRef): Dataset IRI this row is a part of. + graph (rdflib.URIRef): Graph to map row into. + extra_schema (frictionless.Schema): Schema of extra fields. + base_iri (rdflib.Namespace): Base IRI to use for mapping. + """ + # Set the row number to start from the data, excluding header + row_num = row.row_number - 1 + + # Create BDR project IRI + project = utils.rdf.uri(f"project/SSD-Survey-Project/{row_num}", base_iri) + + # Create TERN survey IRI from surveyID field + survey_id: str | None = row["surveyID"] + survey = utils.iri_patterns.survey_iri(base_iri, survey_id) + + # Create survey plan IRI + survey_plan = utils.iri_patterns.plan_iri( + base_iri, + "survey", + (survey_id or str(row_num)), # fallback to row number when surveyID not available. + ) + + # Conditionally create survey type attribute, value and collection IRIs + row_survey_type: str | None = row["surveyType"] + if row_survey_type: + survey_type_attribute = utils.iri_patterns.attribute_iri(base_iri, "surveyType", row_survey_type) + survey_type_value = utils.iri_patterns.attribute_value_iri(base_iri, "surveyType", row_survey_type) + survey_type_collection = utils.iri_patterns.attribute_collection_iri( + base_iri, "Survey", "surveyType", row_survey_type + ) + else: + survey_type_attribute = None + survey_type_value = None + survey_type_collection = None + + # Create target habitat scope attribute and value objects + target_habitat_objects: list[AttributeValue] = [] + if target_habitats := row["targetHabitatScope"]: + for target_habitat in target_habitats: + target_habitat_objects.append( + AttributeValue( + raw=target_habitat, + attribute=utils.iri_patterns.attribute_iri(base_iri, "targetHabitatScope", target_habitat), + value=utils.iri_patterns.attribute_value_iri(base_iri, "targetHabitatScope", target_habitat), + collection=utils.iri_patterns.attribute_collection_iri( + base_iri, "Survey", "targetHabitatScope", target_habitat + ), + ), + ) + + # Create target taxonomic scope attribute and value IRIs (list input) + target_taxonomic_objects: list[AttributeValue] = [] + if target_taxa := row["targetTaxonomicScope"]: + for target_taxon in target_taxa: + target_taxonomic_objects.append( + AttributeValue( + raw=target_taxon, + attribute=utils.iri_patterns.attribute_iri(base_iri, "targetTaxonomicScope", target_taxon), + value=utils.iri_patterns.attribute_value_iri(base_iri, "targetTaxonomicScope", target_taxon), + collection=utils.iri_patterns.attribute_collection_iri( + base_iri, "Survey", "targetTaxonomicScope", target_taxon + ), + ) + ) + + # Create survey orgs iris + survey_org_objects: list[SurveyIDDatatype] = [] + if survey_orgs := row["surveyOrgs"]: + for raw_org in survey_orgs: + survey_org_objects.append( + SurveyIDDatatype( + name=raw_org, + datatype=utils.iri_patterns.datatype_iri("surveyID", raw_org), + agent=utils.iri_patterns.agent_iri(raw_org), + ) + ) + + # Add BDR project + self.add_project( + uri=project, + survey=survey, + dataset=dataset, + graph=graph, + row=row, + ) + + # Add BDR survey + self.add_survey( + uri=survey, + survey_plan=survey_plan, + survey_org_objects=survey_org_objects, + row=row, + graph=graph, + ) + + # Attach temporal coverage if present + self.add_temporal_coverage( + uri=survey, + row=row, + graph=graph, + ) + + # Add spatial coverage values + self.add_spatial_coverage( + uri=survey, + row=row, + graph=graph, + ) + + for so_obj in survey_org_objects: + # Add survey ID source datatype nodes + self.add_survey_id_source_datatypes( + uri=so_obj.datatype, + agent=so_obj.agent, + graph=graph, + ) + + # Add agent + self.add_agent( + uri=so_obj.agent, + name=so_obj.name, + graph=graph, + ) + + # Add plan + self.add_plan( + uri=survey_plan, + row=row, + dataset=dataset, + graph=graph, + ) + + # Add survey type attribute node + self.add_survey_type_attribute( + uri=survey_type_attribute, + survey_type_value=survey_type_value, + row_survey_type=row_survey_type, + dataset=dataset, + graph=graph, + ) + + # Add survey type value node + self.add_survey_type_value( + uri=survey_type_value, + row_survey_type=row_survey_type, + dataset=dataset, + graph=graph, + base_iri=base_iri, + ) + + # Add survey type collection node + self.add_survey_type_collection( + uri=survey_type_collection, + row_survey_type=row_survey_type, + survey_type_attribute=survey_type_attribute, + survey_plan=survey_plan, + dataset=dataset, + graph=graph, + ) + + # Iterate through target habitat objects + for th_obj in target_habitat_objects: + # Add target habitat scope attribute node + self.add_target_habitat_attribute( + uri=th_obj.attribute, + dataset=dataset, + target_habitat_value=th_obj.value, + raw_value=th_obj.raw, + graph=graph, + ) + + # Add target habitat scope value node + self.add_target_habitat_value( + uri=th_obj.value, + dataset=dataset, + raw_value=th_obj.raw, + graph=graph, + base_iri=base_iri, + ) + + # Add target habitat scope collection + self.add_target_habitat_collection( + uri=th_obj.collection, + raw_value=th_obj.raw, + target_habitat_attribute=th_obj.attribute, + survey_plan=survey_plan, + dataset=dataset, + graph=graph, + ) + + # Iterate through target taxonomic objects + for tt_obj in target_taxonomic_objects: + # Add target taxonomic scope attribute node + self.add_target_taxonomic_attribute( + uri=tt_obj.attribute, + dataset=dataset, + target_taxon_value=tt_obj.value, + raw_value=tt_obj.raw, + graph=graph, + ) + + # Add target taxonomic scope value node + self.add_target_taxonomic_value( + uri=tt_obj.value, + dataset=dataset, + raw_value=tt_obj.raw, + graph=graph, + base_iri=base_iri, + ) + + # Add target taxonomic scope collection node + self.add_target_taxonomic_scope_collection( + uri=tt_obj.collection, + raw_value=tt_obj.raw, + target_taxon_attribute=tt_obj.attribute, + survey_plan=survey_plan, + dataset=dataset, + graph=graph, + ) + + # Add extra columns JSON + self.add_extra_fields_json( + subject_uri=survey, + row=row, + graph=graph, + extra_schema=extra_schema, + ) + + def add_project( + self, + uri: rdflib.URIRef, + survey: rdflib.URIRef, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + row: frictionless.Row, + ) -> None: + """Adds the ABIS project to the graph + + Args: + uri (rdflib.URIRef): URI to use for this node. + survey (rdflib.URIRef): BDR survey uri. + dataset (rdflib.URIRef): Dataset uri. + graph (rdflib.Graph): Graph to add to. + row (frictionless.Row): Row to be processed in dataset. + """ + # Extract relevant values from row + project_id = row["surveyID"] + project_name = row["surveyName"] + + # Add type and attach to dataset + graph.add((uri, a, utils.namespaces.ABIS.Project)) + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + + # Add (required) project name, id (not required) and purpose (not required). + graph.add((uri, rdflib.SDO.name, rdflib.Literal(project_name))) + if project_id: + graph.add((uri, rdflib.SDO.identifier, rdflib.Literal(project_id))) + + # Attach survey + graph.add((uri, rdflib.SDO.hasPart, survey)) + + def add_survey( + self, + uri: rdflib.URIRef, + survey_plan: rdflib.URIRef, + survey_org_objects: list[SurveyIDDatatype], + row: frictionless.Row, + graph: rdflib.Graph, + ) -> None: + """Adds the tern:Survey to the graph. + + Args: + uri (rdflib.URIRef): URI of the survey. + survey_plan (rdflib.URIRef): URI of survey plan + survey_org_objects (list[SurveyIDDatatype]): Data objects + describing the survey organisations + row (frictionless.Row): Data row provided in the data csv + graph (rdflib.Graph): The graph to be modified. + """ + # Add type and dataset + graph.add((uri, a, utils.namespaces.TERN.Survey)) + + # Add survey name + graph.add((uri, rdflib.SDO.name, rdflib.Literal(row["surveyName"]))) + + # Add survey ID + if (survey_id := row["surveyID"]) is not None: + # Add survey id literals per organisation + for survey_org in survey_org_objects: + id_literal = rdflib.Literal(lexical_or_value=survey_id, datatype=survey_org.datatype) + graph.add((uri, rdflib.SDO.identifier, id_literal)) + + # Add survey id as type string if no organisation provided + if len(survey_org_objects) == 0: + id_literal = rdflib.Literal(survey_id) + graph.add((uri, rdflib.SDO.identifier, id_literal)) + + # Add taxonomic coverage + if taxonomic_coverage := row["targetTaxonomicScope"]: + for taxa in taxonomic_coverage: + graph.add((uri, utils.namespaces.BDR.target, rdflib.Literal(taxa))) + + # Add purpose + if purpose := row["surveyPurpose"]: + graph.add((uri, utils.namespaces.BDR.purpose, rdflib.Literal(purpose))) + + # Add plan + graph.add((uri, rdflib.PROV.hadPlan, survey_plan)) + + # Add keywords + if keywords := row["keywords"]: + for keyword in keywords: + graph.add((uri, rdflib.SDO.keywords, rdflib.Literal(keyword))) + + def add_spatial_coverage( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + graph: rdflib.Graph, + ) -> None: + """Adds the spatial coverage fields to the graph. + + Args: + uri (rdflib.URIRef): Base URI the spatial information will be attached + row (frictionless.Row): Data row provided in the data csv + graph (rdflib.Graph): Graph to be modified + """ + # Extract relevant values + datum = row["geodeticDatum"] + sc_geometry = row["spatialCoverageWKT"] + + if not (datum and sc_geometry): + return + + # Construct geometry + geometry = models.spatial.Geometry( + raw=sc_geometry, + datum=datum, + ) + + # Add spatial coverage + geometry_node = rdflib.BNode() + graph.add((uri, utils.namespaces.GEO.hasGeometry, geometry_node)) + graph.add((geometry_node, a, utils.namespaces.GEO.Geometry)) + graph.add((geometry_node, utils.namespaces.GEO.asWKT, geometry.to_transformed_crs_rdf_literal())) + + self.add_geometry_supplied_as( + subj=uri, + pred=utils.namespaces.GEO.hasGeometry, + obj=geometry_node, + geom=geometry, + graph=graph, + ) + + def add_temporal_coverage( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + graph: rdflib.Graph, + ) -> None: + """Adds the temporal coverage fields to the graph. + + Args: + uri (rdflib.URIRef): Base URI the temporal information will be attached + row (frictionless.Row): Data row provided in the data csv + graph (rdflib.Graph): Graph to be modified + """ + # Determine if any dates are present in the row + start_date: models.temporal.Timestamp = row["surveyStart"] + end_date: models.temporal.Timestamp = row["surveyEnd"] + if not start_date and not end_date: + return + + # Create temporal coverage node + temporal_coverage = rdflib.BNode() + graph.add((temporal_coverage, a, rdflib.TIME.TemporalEntity)) + if start_date: + begin = rdflib.BNode() + graph.add((temporal_coverage, rdflib.TIME.hasBeginning, begin)) + graph.add((begin, a, rdflib.TIME.Instant)) + graph.add((begin, start_date.rdf_in_xsd, start_date.to_rdf_literal())) + if end_date: + end = rdflib.BNode() + graph.add((temporal_coverage, rdflib.TIME.hasEnd, end)) + graph.add((end, a, rdflib.TIME.Instant)) + graph.add((end, end_date.rdf_in_xsd, end_date.to_rdf_literal())) + + # Attach to survey node + graph.add((uri, rdflib.TIME.hasTime, temporal_coverage)) + + def add_survey_id_source_datatypes( + self, + uri: rdflib.URIRef, + agent: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Adds the source datatype nodes to graph. + + Args: + uri (rdflib.URIRef): The reference uri. + agent (rdflib.URIRef): Agent uri. + graph (rdflib.Graph): Graph to be modified. + """ + # Add type + graph.add((uri, a, rdflib.RDFS.Datatype)) + + # Add label + graph.add((uri, rdflib.SKOS.prefLabel, rdflib.Literal("surveyID source"))) + + # Add attribution + attribution = rdflib.BNode() + graph.add((attribution, a, rdflib.PROV.Attribution)) + graph.add((attribution, rdflib.PROV.agent, agent)) + graph.add((attribution, rdflib.PROV.hadRole, PRINCIPAL_INVESTIGATOR)) + graph.add((uri, rdflib.PROV.qualifiedAttribution, attribution)) + + def add_agent( + self, + uri: rdflib.URIRef, + name: str, + graph: rdflib.Graph, + ) -> None: + """Adds agent to graph. + + Args: + uri (rdflib.URIRef): Agent reference + name (str): Original name supplied + graph (rdflib.Graph): Graph to be modified + """ + # Add type + graph.add((uri, a, rdflib.PROV.Agent)) + + # Add name + graph.add((uri, rdflib.SDO.name, utils.rdf.uri_or_string_literal(name))) + + def add_plan( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Adds plan to graph. + + Args: + uri: Plan reference. + row: Raw data row. + dataset: URI for the dataset node. + graph: Graph to be modified. + """ + # Add type + graph.add((uri, a, rdflib.PROV.Plan)) + + # add link to dataset + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + + # Add citation(s) + if citations := row["surveyMethodCitation"]: + for citation in citations: + graph.add((uri, rdflib.SDO.citation, rdflib.Literal(citation))) + + # Add description + if description := row["surveyMethodDescription"]: + graph.add((uri, rdflib.SDO.description, rdflib.Literal(description))) + + # Add method url(s) + if method_urls := row["surveyMethodURL"]: + for method_url in method_urls: + graph.add((uri, rdflib.SDO.url, rdflib.Literal(method_url, datatype=rdflib.XSD.anyURI))) + + def add_survey_type_attribute( + self, + uri: rdflib.URIRef | None, + survey_type_value: rdflib.URIRef | None, + row_survey_type: str | None, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Adds survey type attribute node. + + Args: + uri: Attribute node for survey type + survey_type_value: Value node for Survey type + row_survey_type: Raw value from the template for surveyType + dataset: Dataset the data belongs. + graph: Graph to be modified. + """ + # Non default field, return if not present + if uri is None: + return + + # Add type + graph.add((uri, a, utils.namespaces.TERN.Attribute)) + + # Add dataset + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + + # Add attribute + graph.add((uri, utils.namespaces.TERN.attribute, CONCEPT_SURVEY_TYPE)) + + # Add value + if row_survey_type: + graph.add((uri, utils.namespaces.TERN.hasSimpleValue, rdflib.Literal(row_survey_type))) + if survey_type_value: + graph.add((uri, utils.namespaces.TERN.hasValue, survey_type_value)) + + def add_survey_type_value( + self, + uri: rdflib.URIRef | None, + row_survey_type: str | None, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + base_iri: rdflib.Namespace, + ) -> None: + """Adds the survey type value node to graph. + + Args: + uri: Survey type value iri. + row_survey_type: Raw value from the template for surveyType + dataset: Dataset raw data belongs. + graph: Graph to be modified. + base_iri: Namespace used to construct IRIs + """ + # Return no value IRI + if uri is None: + return + + # Add type + graph.add((uri, a, utils.namespaces.TERN.IRI)) + graph.add((uri, a, utils.namespaces.TERN.Value)) + + if row_survey_type: + # Add label + graph.add((uri, rdflib.RDFS.label, rdflib.Literal(row_survey_type))) + + # Retrieve vocab for field + vocab = self.fields()["surveyType"].get_flexible_vocab() + + # Add value + term = vocab(graph=graph, source=dataset, base_iri=base_iri).get(row_survey_type) + graph.add((uri, rdflib.RDF.value, term)) + + def add_survey_type_collection( + self, + *, + uri: rdflib.URIRef | None, + row_survey_type: str | None, + survey_type_attribute: rdflib.URIRef | None, + survey_plan: rdflib.URIRef, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Add a survey type Collection to the graph + + Args: + uri: The uri for the Collection. + row_survey_type: surveyType value from template. + survey_type_attribute: The uri for the attribute node. + survey_plan: The uri for the Survey Plan node that wil be a member of the Collection. + dataset: The uri for the dateset node. + graph: The graph. + """ + # Check if collection node should be created + if uri is None: + return + + # Add type + graph.add((uri, a, rdflib.SDO.Collection)) + # Add identifier + if row_survey_type: + graph.add( + ( + uri, + rdflib.SDO.name, + rdflib.Literal(f"Survey Collection - Survey Type - {row_survey_type}"), + ) + ) + # Add link to dataset + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + # Add link to attribute + if survey_type_attribute: + graph.add((uri, utils.namespaces.TERN.hasAttribute, survey_type_attribute)) + # add link to the Survey Plan node + graph.add((uri, rdflib.SDO.member, survey_plan)) + + def add_target_habitat_attribute( + self, + uri: rdflib.URIRef, + dataset: rdflib.URIRef, + target_habitat_value: rdflib.URIRef, + raw_value: str, + graph: rdflib.Graph, + ) -> None: + """Adds the target habitat scope attribute node. + + Args: + uri (rdflib.URIRef): Subject of the node. + dataset (rdflib.URIRef): Dataset raw data belongs. + target_habitat_value (rdflib.URIRef): Corresponding value. + raw_value (str): Raw data. + graph (rdflib.Graph): Graph to be modified. + """ + # Add type + graph.add((uri, a, utils.namespaces.TERN.Attribute)) + + # Add dataset + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + + # Add attribute concept + graph.add((uri, utils.namespaces.TERN.attribute, CONCEPT_TARGET_HABITAT_SCOPE)) + + # Add value + graph.add((uri, utils.namespaces.TERN.hasSimpleValue, rdflib.Literal(raw_value))) + graph.add((uri, utils.namespaces.TERN.hasValue, target_habitat_value)) + + def add_target_habitat_value( + self, + uri: rdflib.URIRef, + dataset: rdflib.URIRef, + raw_value: str, + graph: rdflib.Graph, + base_iri: rdflib.Namespace, + ) -> None: + """Add the target habitat scope value node. + + Args: + uri (rdflib.URIRef): Subject of the node. + dataset (rdflib.URIRef): Dataset raw data belongs. + raw_value (str): Raw data. + graph (rdflib.Graph): Graph to be modified. + base_iri (rdflib.Namespace): Namespace used to construct IRIs + """ + # Add types + graph.add((uri, a, utils.namespaces.TERN.IRI)) + graph.add((uri, a, utils.namespaces.TERN.Value)) + + # Add label + graph.add((uri, rdflib.RDFS.label, rdflib.Literal(raw_value))) + + # Retrieve vocab for field + vocab = self.fields()["targetHabitatScope"].get_flexible_vocab() + + # Add value + term = vocab(graph=graph, source=dataset, base_iri=base_iri).get(raw_value) + graph.add((uri, rdflib.RDF.value, term)) + + def add_target_habitat_collection( + self, + *, + uri: rdflib.URIRef, + raw_value: str, + target_habitat_attribute: rdflib.URIRef, + survey_plan: rdflib.URIRef, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Add a target habitat Collection to the graph + + Args: + uri: The uri for the Collection. + raw_value: targetTaxonomicScope value from template. + target_habitat_attribute: The uri for the attribute node. + survey_plan: The uri for the Survey Plan node that wil be a member of the Collection. + dataset: The uri for the dateset node. + graph: The graph. + """ + # Add type + graph.add((uri, a, rdflib.SDO.Collection)) + # Add identifier + graph.add( + ( + uri, + rdflib.SDO.name, + rdflib.Literal(f"Survey Collection - Target Habitat Scope - {raw_value}"), + ) + ) + # Add link to dataset + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + # Add link to attribute + graph.add((uri, utils.namespaces.TERN.hasAttribute, target_habitat_attribute)) + # add link to the Survey Plan node + graph.add((uri, rdflib.SDO.member, survey_plan)) + + def add_target_taxonomic_attribute( + self, + uri: rdflib.URIRef, + dataset: rdflib.URIRef, + target_taxon_value: rdflib.URIRef, + raw_value: str, + graph: rdflib.Graph, + ) -> None: + """Add the target taxonomic scope node. + + Args: + uri (rdflib.URIRef): Subject of the node. + dataset (rdflib.URIRef): Dataset raw data belongs. + target_taxon_value (rdflib.URIRef): Corresponding + value node. + raw_value (str): Raw data provided. + graph (rdflib.Graph): Graph to be modified. + """ + # Add type + graph.add((uri, a, utils.namespaces.TERN.Attribute)) + + # Add dataset + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + + # Add attribute concept + graph.add((uri, utils.namespaces.TERN.attribute, CONCEPT_TARGET_TAXONOMIC_SCOPE)) + + # Add values + graph.add((uri, utils.namespaces.TERN.hasSimpleValue, rdflib.Literal(raw_value))) + graph.add((uri, utils.namespaces.TERN.hasValue, target_taxon_value)) + + def add_target_taxonomic_value( + self, + uri: rdflib.URIRef, + dataset: rdflib.URIRef, + raw_value: str, + graph: rdflib.Graph, + base_iri: rdflib.Namespace, + ) -> None: + """Adds the target toxonomic scope value node. + + Args: + uri (rdflib.URIRef): Subject of the node. + dataset (rdflib.URIRef): Dataset raw data belongs. + raw_value (str): Raw data provided. + graph (rdflib.Graph): Graph to be modified. + base_iri (rdflib.Namespace): Namespace used to construct IRIs + """ + # Add types + graph.add((uri, a, utils.namespaces.TERN.IRI)) + graph.add((uri, a, utils.namespaces.TERN.Value)) + + # Add label + graph.add((uri, rdflib.RDFS.label, rdflib.Literal(raw_value))) + + # Retrieve vocab for field + vocab = self.fields()["targetTaxonomicScope"].get_flexible_vocab() + + # Add value + term = vocab(graph=graph, source=dataset, base_iri=base_iri).get(raw_value) + graph.add((uri, rdflib.RDF.value, term)) + + def add_target_taxonomic_scope_collection( + self, + *, + uri: rdflib.URIRef, + raw_value: str, + target_taxon_attribute: rdflib.URIRef, + survey_plan: rdflib.URIRef, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Add a target taxonomic scope Collection to the graph + + Args: + uri: The uri for the Collection. + raw_value: targetTaxonomicScope value from template. + target_taxon_attribute: The uri for the attribute node. + survey_plan: The uri for the Survey Plan node that wil be a member of the Collection. + dataset: The uri for the dateset node. + graph: The graph. + """ + # Add type + graph.add((uri, a, rdflib.SDO.Collection)) + # Add identifier + graph.add( + ( + uri, + rdflib.SDO.name, + rdflib.Literal(f"Survey Collection - Target Taxonomic Scope - {raw_value}"), + ) + ) + # Add link to dataset + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + # Add link to attribute + graph.add((uri, utils.namespaces.TERN.hasAttribute, target_taxon_attribute)) + # add link to the Survey Plan node + graph.add((uri, rdflib.SDO.member, survey_plan)) + + +# Register Mapper +base.mapper.register_mapper(SurveyMetadataMapper) diff --git a/abis_mapping/templates/survey_metadata_v3/metadata.json b/abis_mapping/templates/survey_metadata_v3/metadata.json new file mode 100644 index 00000000..83656bb1 --- /dev/null +++ b/abis_mapping/templates/survey_metadata_v3/metadata.json @@ -0,0 +1,13 @@ +{ + "name": "survey_metadata", + "label": "Systematic Survey Metadata Template", + "version": "3.0.0", + "description": "A template for systematic survey metadata", + "biodiversity_type": "Systematic Survey Metadata", + "spatial_type": "Point, line, polygon", + "file_type": "CSV", + "sampling_type": "systematic survey", + "template_url": "https://raw.githubusercontent.com/gaiaresources/abis-mapping/main/abis_mapping/templates/survey_metadata_v3/survey_metadata.csv", + "schema_url": "https://raw.githubusercontent.com/gaiaresources/abis-mapping/main/abis_mapping/templates/survey_metadata_v3/schema.json", + "template_lifecycle_status": "beta" +} diff --git a/abis_mapping/templates/survey_metadata_v3/schema.json b/abis_mapping/templates/survey_metadata_v3/schema.json new file mode 100644 index 00000000..ae4682fb --- /dev/null +++ b/abis_mapping/templates/survey_metadata_v3/schema.json @@ -0,0 +1,194 @@ +{ + "fields": [ + { + "name": "surveyID", + "title": "Survey ID", + "description": "The identifier for the survey. Important if more there is more than one survey a the project.", + "example": "COL1", + "type": "string", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "surveyName", + "title": "Survey Name", + "description": "Brief title for the survey.", + "type": "string", + "format": "default", + "example": "Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits - Summer", + "constraints": { + "required": true + } + }, + { + "name": "surveyPurpose", + "title": "Survey Purpose", + "description": "A description of the survey objective", + "type": "string", + "format": "default", + "example": "Summer sampling for peak insect diversity.", + "constraints": { + "required": false + } + }, + { + "name": "surveyType", + "title": "Survey Type", + "description": "Description of type of survey conducted", + "example": "Wet pitfall trapping", + "type": "string", + "format": "default", + "constraints": { + "required": false + }, + "vocabularies": [ + "SURVEY_TYPE" + ] + }, + { + "name": "surveyStart", + "title": "Survey Start", + "description": "The date data collection commenced.", + "example": "21/09/2020", + "type": "timestamp", + "format": "default", + "constraints": { + "required": true + } + }, + { + "name": "surveyEnd", + "title": "Survey End", + "description": "The date data collection was completed.", + "example": "23/09/2020", + "type": "timestamp", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "targetTaxonomicScope", + "title": "Target Taxonomic Scope", + "description": "The range of biological taxa covered by the survey. Multiple terms are allowed, separated by a vertical bar aka pipe |", + "example": "Coleoptera | Formicidae", + "type": "list", + "format": "default", + "constraints": { + "required": false + }, + "vocabularies": [ + "TARGET_TAXONOMIC_SCOPE" + ] + }, + { + "name": "targetHabitatScope", + "title": "Target Habitat Scope", + "description": "The habitats targeted for sampling during the survey. Multiple terms are allowed, separated by a vertical bar aka pipe |", + "example": "Woodland", + "type": "list", + "format": "default", + "constraints": { + "required": false + }, + "vocabularies": [ + "TARGET_HABITAT_SCOPE" + ] + }, + { + "name": "spatialCoverageWKT", + "title": "Spatial Coverage (WKT)", + "description": "Well Known Text (WKT) expression of the geographic coordinates that describe the survey's spatial extent. Ensure the coordinates are arranged in 'longitude latitude' order and do not include the CRS in the WKT expression (it comes from the geodeticDatum field).", + "example": "POLYGON ((146.363 -33.826, 148.499 -33.826, 148.499 -34.411, 146.363 -33.826))", + "type": "wkt", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "geodeticDatum", + "title": "Spatial Coverage (Geodetic Datum)", + "description": "The geodetic datum upon which the geographic coordinates in the Spatial coverage (WKT) are based.", + "example": "GDA2020", + "type": "string", + "format": "default", + "url": "https://dwc.tdwg.org/terms/#dwc:geodeticDatum", + "constraints": { + "required": false, + "enum": [ + "AGD66", + "EPSG:4202", + "AGD84", + "EPSG:4203", + "GDA2020", + "EPSG:7844", + "GDA94", + "EPSG:4283", + "WGS84", + "EPSG:4326" + ] + }, + "vocabularies": [ + "GEODETIC_DATUM" + ] + }, + { + "name": "surveyOrgs", + "title": "Survey Orgs", + "description": "Name of organisations or individuals for whom Survey is being conducted. Multiple terms are allowed, separated by a vertical bar aka pipe |", + "example": "NSW Department of Planning, Industry and Environment | CSIRO", + "type": "list", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "surveyMethodCitation", + "title": "Survey Method Citation", + "description": "A citation or reference to the survey methods used.", + "example": "Ng, K., Barton, P.S., Blanchard, W. et al. Disentangling the effects of farmland use, habitat edges, and vegetation structure on ground beetle morphological traits. Oecologia 188, 645–657 (2018). https://doi.org/10.1007/s00442-018-4180-9", + "type": "list", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "surveyMethodDescription", + "title": "Survey Method Description", + "description": "Free text description of the survey method used.", + "example": "Our experimental design consisted of four 400 m transects running from inside each woodland patch out into four adjoining farmland uses (crop, rested, woody debris application, revegetation plantings). To quantify potential edge efects on beetle species traits, we sampled beetles at five locations along each transect: 200 and 20 m inside woodlands, 200 and 20 m inside farmlands, and at the woodland–farmland edge (0 m). Each sampling location comprised a pair of wet invertebrate pitfall traps. separated by a drift fence (60 cm long x 10 cm high) to help direct arthropods into traps. We opened a total of 220 pairs of traps for 14 days during spring (Oct–Nov 2014), and repeated sampling during summer (January–February 2015). Beetle samples from each pitfall trap pair, and across the two time periods, were pooled to provide one sample per sampling location.", + "type": "string", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "surveyMethodURL", + "title": "Survey Method URL", + "description": "A DOI or link to the reference about the survey method, if available.", + "example": "https://biocollect.ala.org.au/document/download/2022-01/202201%20CBR%20Flora%20and%20Vegetation%20report_draftv1.pdf | https://doi.org/10.1002/9781118945568.ch11", + "type": "list", + "format": "uri", + "constraints": { + "required": false + } + }, + { + "name": "keywords", + "title": "Keywords", + "description": "Terms, phrases or descriptors that highlight the key attributes of the study. Multiple terms are allowed, separated by a vertical bar aka pipe |", + "example": "ground beetle | habitat | morphology | traits | farmland | woodland | remnant vegetation | split-plot study", + "type": "list", + "format": "default", + "constraints": { + "required": false + } + } + ] +} diff --git a/abis_mapping/templates/survey_metadata_v3/survey_metadata.csv b/abis_mapping/templates/survey_metadata_v3/survey_metadata.csv new file mode 100644 index 00000000..89fc5ae9 --- /dev/null +++ b/abis_mapping/templates/survey_metadata_v3/survey_metadata.csv @@ -0,0 +1 @@ +surveyID,surveyName,surveyPurpose,surveyType,surveyStart,surveyEnd,targetTaxonomicScope,targetHabitatScope,spatialCoverageWKT,geodeticDatum,surveyOrgs,surveyMethodCitation,surveyMethodDescription,surveyMethodURL,keywords diff --git a/abis_mapping/templates/survey_metadata_v3/templates/instructions.md b/abis_mapping/templates/survey_metadata_v3/templates/instructions.md new file mode 100644 index 00000000..faa7b354 --- /dev/null +++ b/abis_mapping/templates/survey_metadata_v3/templates/instructions.md @@ -0,0 +1,181 @@ +{% extends "BASE_TEMPLATE base.md" %} +{% block body %} +# SYSTEMATIC SURVEY METADATA TEMPLATE INSTRUCTIONS + +## Intended Usage +This Systematic Survey Metadata template should be used to record metadata relating to a Systematic Survey dataset. + +The Systematic Survey Metadata template **must be used in combination** with the +Systematic Survey Occurrence template and, in some cases, the Systematic Survey Site template +with or without the Systematic Survey Site Visit template. + +Templates have been provided to facilitate integration of your data into the Biodiversity +Data Repository database. Not all types of data have been catered for in the available +templates at this stage; therefore, if you are unable to find a suitable template, please +contact to make us aware of your data needs. + +## Data Validation Requirements: +For data validation, you will need your data file to: + +- be the correct **file format**, +- have **fields that match the template downloaded** (do not remove, or + change the order of fields), +- have extant values for **mandatory fields** (see Table 1), and +- comply with all **data value constraints**; for example the geographic coordinates are + consistent with a [geodeticDatum](#geodeticDatum-vocabularies) type of the ***{{values.geodetic_datum_count}}*** available + options. +- only **one row of metadata** should be included and only the first row of metadata will be accepted + (this symbolises one Survey per dataset submission). + +Additional fields may be added **after the templated fields** (noting that the data type +is not assumed and values will be encoded as strings). + +### FILE FORMAT +- The systematic survey metadata template is a [UTF-8](#appendix-iv-utf-8) encoded csv (not Microsoft +Excel Spreadsheets). Be sure to save this file with your data as a .csv (UTF-8) as +follows, otherwise it will not pass the in-browser csv validation step upon upload. +
`[MS Excel: Save As > More options > Tools > Web options > Save this document as > +Unicode (UTF-8)]` +- **Do not include empty rows.** + +### FILE NAME + +When making a manual submission to the Biodiversity Data Repository, +the file name must include the version number +of this biodiversity data template (`v{{ metadata.version }}`). +The following format is an example of a valid file name: + +`data_descripion-v{{ metadata.version }}-additional_description.csv` + +where: + +* `data_description`: A short description of the data (e.g. `survey_meta`, `test_data`). +* `v{{ metadata.version }}`: The version number of this template. +* `additional_description`: (Optional) Additional description of the data, if needed (e.g. `test_data`). +* `.csv`: Ensure the file name ends with `.csv`. + +For example, `survey_meta-v{{ metadata.version }}-test_data.csv` or `test_data-v{{ metadata.version }}.csv` + +### FILE SIZE +MS Excel imposes a limit of 1,048,576 rows on a spreadsheet, limiting a CSV file to the +header row followed by 1,048,575 occurrences. Furthermore, MS Excel has a 32,767 character +limit on individual cells in a spreadsheet. These limits may be overcome by using or +editing CSV files with other software. + +Larger datasets may be more readily ingested using the API interface. Please contact + to make us aware of your data needs. + +## TEMPLATE FIELDS +The template file contains the field names in the top row that form part of the core Survey +data model. Table 1 will assist you in transferring your data to the template with the following +information: + +- **Field name** in the template (and an external link to the Darwin Core standard for that +field where available); +- **Description** of the field; +- **Required** i.e. whether the field is **mandatory, +conditionally mandatory, or optional**; +- **Datatype format** required for the data values for example text (string), number +(integer, float), or date; and +- **Example/s** of an entry for that field. +- **Vocabulary links** within this document (for example pick list values) where relevant. +The fields that have suggested values options for the fields in Table 1 are listed in +Table 2 in alphabetical order of field name. + +### ADDITIONAL FIELDS +Data that do not match the existing template fields may be added as additional columns in +the CSV files after the templated fields. +For example, `sampleSizeUnit`, `sampleSizeValue`. + +Table 1: Systematic Survey Metadata template fields with descriptions, conditions, +datatype format, and examples. + +{{tables.fields}} + +## CHANGELOG + +No changes from Systematic Survey Metadata Template v2.0.0 + +## APPENDICES +### APPENDIX-I: Vocabulary List +With the exception of `geodeticDatum`, data validation +does not require adherence to the vocabularies for the various vocabularied fields.. These vocabularies are provided as a +means of assistance in developing consistent language within the database. New terms can be added +to more appropriately describe your data that goes beyond the current list. + +Table 2 provides some +suggested values from existing sources such as: [Biodiversity Information Standard (TDWG)](https://dwc.tdwg.org/), +[EPSG.io Coordinate systems worldwide](https://epsg.io/), the [Global Biodiversity Information +System](https://rs.gbif.org/), and [Open Nomenclature in the biodiversity +era](https://doi.org/10.1111/2041-210X.12594). + +Table 2: Suggested values for the controlled vocabulary fields in the template. Each term has +a preferred label with a definition to aid understanding of its meaning. For some terms, alternative +labels with similar semantics are provided.
Note: the value for `geodeticDatum` +must come from one of five options in this table.
+ +{{tables.vocabularies}} + +### APPENDIX-II: Well Known Text (WKT) +For general information on how WKT coordinate reference data is formatted is available [here](https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry). +The length of a WKT string or of its components is not prescribed; however, MS Excel *does* has a +32,767 (32K) character limit on individual cells in a spreadsheet. + +It is possible to edit CSV files outside of Excel in order to include more than 32K characters. + +**Note:** Ensure the coordinates are arranged in `longitude latitude` order and do not include the CRS in the WKT expression (it comes from the geodeticDatum field). + +![Multipart geometries (2D) WKT](assets/multipart_geometries_2d_wkt.png) +
*Source: Mwtoews - CC BY-SA 3.0 - Wikipedia *
+ +### APPENDIX-III: Timestamp +Following date and date-time formats are acceptable within the timestamp: + +| TYPE | FORMAT | +| --- |-------------------------------------------------------------------------------------------------------------------------------------| +| **xsd:dateTimeStamp with timezone** | yyyy-mm-ddThh:mm:ss.sTZD (eg 1997-07-16T19:20:30.45+01:00) OR
yyyy-mm-ddThh:mm:ssTZD (eg 1997-07-16T19:20:30+01:00) OR
yyyy-mm-ddThh:mmTZD (eg 1997-07-16T19:20+01:00)| +| **xsd:dateTime** | yyyy-mm-ddThh:mm:ss.s (eg 1997-07-16T19:20:30.45) OR
yyyy-mm-ddThh:mm:ss (eg 1997-07-16T19:20:30) OR
yyyy-mm-ddThh:mm (eg 1997-07-16T19:20) | +| **xsd:Date** | dd/mm/yyyy OR
d/m/yyyy OR
yyyy-mm-dd OR
yyyy-m-d | +| **xsd:gYearMonth** | mm/yyyy OR
m/yyyy OR
yyyy-mm | +| **xsd:gYear** | yyyy | + +Where:
+  `yyyy`: four-digit year
+  `mm`: two-digit month (01=January, etc.)
+  `dd`: two-digit day of month (01 through 31)
+  `hh`: two digits of hour (00 through 23) (am/pm NOT allowed)
+  `mm`: two digits of minute (00 through 59)
+  `ss`: two digits of second (00 through 59)
+  `s`: one or more digits representing a decimal fraction of a second
+  `TZD`: time zone designator (Z or +hh:mm or -hh:mm)
+ +### APPENDIX-IV: UTF-8 +UTF-8 encoding is considered a best practice for handling character encoding, especially in +the context of web development, data exchange, and modern software systems. UTF-8 +(Unicode Transformation Format, 8-bit) is a variable-width character encoding capable of +encoding all possible characters (code points) in Unicode.
+Here are some reasons why UTF-8 is recommended: +- **Universal Character Support:** UTF-8 can represent almost all characters from all writing + systems in use today. This includes characters from various languages, mathematical symbols, + and other special characters. +- **Backward Compatibility:** UTF-8 is backward compatible with ASCII (American + Standard Code for Information Interchange). The first 128 characters in UTF-8 are + identical to ASCII, making it easy to work with systems that use ASCII. +- **Efficiency:** UTF-8 is space-efficient for Latin-script characters (common in English + and many other languages). It uses one byte for ASCII characters and up to four + bytes for other characters. This variable-length encoding minimises storage and + bandwidth requirements. +- **Web Standards:** UTF-8 is the dominant character encoding for web content. It is + widely supported by browsers, servers, and web-related technologies. +- **Globalisation:** As software applications become more globalised, supporting a wide + range of languages and scripts becomes crucial. UTF-8 is well-suited for + internationalisation and multilingual support. +- **Compatibility with Modern Systems:** UTF-8 is the default encoding for many + programming languages, databases, and operating systems. Choosing UTF-8 helps + ensure compatibility across different platforms and technologies. + +When working with text data, UTF-8 encoding is recommended to avoid issues related to character +representation and ensure that a diverse set of characters and languages is supported. + +For assistance, please contact: +{% endblock %} diff --git a/abis_mapping/templates/survey_occurrence_data_v3/README.md b/abis_mapping/templates/survey_occurrence_data_v3/README.md new file mode 100644 index 00000000..d2595b7c --- /dev/null +++ b/abis_mapping/templates/survey_occurrence_data_v3/README.md @@ -0,0 +1,5 @@ +# Template Description +A template to translate some Darwin Core fields + +# Template Instructions +See `instructions.pdf` for more details diff --git a/abis_mapping/templates/survey_occurrence_data_v3/examples/margaret_river_flora/margaret_river_flora.csv b/abis_mapping/templates/survey_occurrence_data_v3/examples/margaret_river_flora/margaret_river_flora.csv new file mode 100644 index 00000000..49ddf57d --- /dev/null +++ b/abis_mapping/templates/survey_occurrence_data_v3/examples/margaret_river_flora/margaret_river_flora.csv @@ -0,0 +1,17 @@ +providerRecordID,providerRecordIDSource,locality,decimalLatitude,decimalLongitude,geodeticDatum,coordinateUncertaintyInMeters,dataGeneralizations,eventDateStart,eventDateEnd,samplingProtocol,basisOfRecord,recordedBy,recordNumber,occurrenceStatus,habitat,establishmentMeans,organismRemarks,individualCount,organismQuantity,organismQuantityType,lifeStage,sex,reproductiveCondition,ownerRecordID,ownerRecordIDSource,collectionCode,catalogNumber,catalogNumberSource,otherCatalogNumbers,otherCatalogNumbersSource,preparations,preparedDate,associatedSequences,sequencingMethod,verbatimIdentification,dateIdentified,identifiedBy,identificationMethod,scientificName,identificationQualifier,identificationRemarks,acceptedNameUsage,kingdom,taxonRank,threatStatus,conservationAuthority,threatStatusCheckProtocol,threatStatusDateDetermined,threatStatusDeterminedBy,sensitivityCategory,sensitivityAuthority,surveyID,siteID,siteVisitID +1,WAM,Cowaramup Bay Road,-33.8,115.21,WGS84,,,26/09/2019,,,,Stream Environment and Water Pty Ltd,,,,,,,,,,,,,,,,,,,,,,,Calothamnus lateralis var. crassus,,Stream Environment and Water Pty Ltd,,Calothamnus lateralis var. crassus,,,,Plantae,,,,,,,,,,, +2,WAM,Cowaramup Bay Road,-33.86,115.01,WGS84,,,26/09/2019,,,,,PE:12:8831,,,,,,,,,,,,,,,,,,,,,,Boronia anceps,,Stream Environment and Water Pty Ltd,,Boronia anceps,,,,Plantae,,,,,,,,,,, +3,WAM,Cowaramup Bay Road,-33.86,115.01,WGS84,,,26/09/2019,,,,Test Pty Ltd,,,,,,,,,,,,,,,,,,,,,,,Boronia anceps,,Stream Environment and Water Pty Ltd,,Boronia anceps,,,,Plantae,,,,,,,,,,, +4,WAM,Cowaramup Bay Road,-33.86,115.01,WGS84,,,26/09/2019,,,,Stream Environment and Water Pty Ltd,,,,,,,,,,,,,,,,,,,,,,,Boronia anceps,,Stream Environment and Water Pty Ltd,,Boronia anceps,,,,Plantae,,,,,,,,,,, +5,WAM,Cowaramup Bay Road,-33.86,114.99,WGS84,,,26/09/2019,,,,Stream Environment and Water Pty Ltd,,,,,,,,,,,,,,,,,,,,,,,Banksia sessilis var. cordata,,Stream Environment and Water Pty Ltd,,Banksia sessilis var. cordata,,,,Plantae,,,,,,,,,,, +6,WAM,Cowaramup Bay Road,-33.86,114.99,WGS84,,,26/09/2019,,,,Stream Environment and Water Pty Ltd,,,,,,,,,,,,,,,,,,,,,,,Banksia sessilis var. cordata,,Stream Environment and Water Pty Ltd,,Banksia sessilis var. cordata,,,,Plantae,,,,,,,,,,, +7,WAM,Cowaramup Bay Road,-33.86,114.99,WGS84,,,26/09/2019,,,,Stream Environment and Water Pty Ltd,,,,,,,,,,,,,,,,,,,,,,,Banksia sessilis var. cordata,,Stream Environment and Water Pty Ltd,,Banksia sessilis var. cordata,,,,Plantae,,,,,,,,,,, +8,WAM,Cowaramup Bay Road,-33.86,114.99,WGS84,,,26/09/2019,,,,Stream Environment and Water Pty Ltd,,,,,,,,,,,,,,,,,,,,,,,Banksia sessilis var. cordata,,Stream Environment and Water Pty Ltd,,Banksia sessilis var. cordata,,,,Plantae,,,,,,,,,,, +9,WAM,Cowaramup Bay Road,-33.86,115.02,WGS84,,,26/09/2019,,,,Stream Environment and Water Pty Ltd,,,,,,,,,,,,,,,,,,,,,,,Caladenia excelsa,,Stream Environment and Water Pty Ltd,,Caladenia excelsa,,,,Plantae,,,,,,,,,,, +10,WAM,Cowaramup Bay Road,-33.86,115.02,WGS84,,,26/09/2019,,,,Stream Environment and Water Pty Ltd,,,,,,,,,,,,,,,,,,,,,,,Caladenia excelsa,,Stream Environment and Water Pty Ltd,,Caladenia excelsa,,,,Plantae,,,,,,,,,,, +11,WAM,Cowaramup Bay Road,-33.86,115.02,WGS84,,,26/09/2019,,,,Stream Environment and Water Pty Ltd,,,,,,,,,,,,,,,,,,,,,,,Caladenia ?excelsa,,Stream Environment and Water Pty Ltd,,Caladenia excelsa,?,One unopened flower when recorded and one leaf only. ID not confirmed,,Plantae,,,,,,,,,,, +12,WAM,,-33.8,115.21,WGS84,,,26/09/2019,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Caladenia excelsa,,,,Plantae,,,,,,,,,,, +13,WAM,Cowaramup Bay Road,-33.86,115.02,WGS84,,,26/09/2019,,,PreservedSpecimen,Stream Environment and Water Pty Ltd,,,,,,,,,,,,,,C01,CC123,WAM,,,,,,,,,Stream Environment and Water Pty Ltd,,Caladenia excelsa,,,,Plantae,,,,,,,,,,, +14,WAM,Cowaramup Bay Road,-33.86,115.02,WGS84,20,Coordinates rounded to the nearest 10 km for conservation concern,26/09/2019,,,HumanObservation,Stream Environment and Water Pty Ltd,,,,,,,,,,,,,,C01,CC456,WAM,,,,,,,Caladenia ?excelsa,,Stream Environment and Water Pty Ltd,,Caladenia excelsa,?,Could not confirm the ID due to damaged flower,,Plantae,,,,,,,,,,, +8022FSJMJ079c5cf,WAM,Cowaramup Bay Road,-33.8,115.21,WGS84,50,Coordinates rounded to the nearest 10 km for conservation concern,26/09/2019,,human observation,PreservedSpecimen,Stream Environment and Water Pty Ltd,PE:12:8832,present,"Closed forest of Melaleuca lanceolata. White, grey or brown sand, sandy loam.",native,Dried out leaf tips,2,,,adult,male,No breeding evident,MR-456,Stream Environment and Water Pty Ltd,32237,ARACH,WAM,BHP2012-7521 | M12378,BHP,Wet (in ethanol or some other preservative),26/09/2019,https://www.ncbi.nlm.nih.gov/nuccore/MH040669.1 | https://www.ncbi.nlm.nih.gov/nuccore/MH040616.1,Sanger dideoxy sequencing,Caladenia ?excelsa,2019-09-27T12:34+08:00,Stream Environment and Water Pty Ltd,Visually identified in the field (sighting),Caladenia excelsa,species incerta,no flowers present,Caladenia excelsa Hopper & A.P.Br.,Plantae,species,VU,WA,Check against Threatened and Priority Fauna List WA available from https://www.dpaw.wa.gov.au/plants-and-animals/threatened-species-and-communities/threatened-animals. Last updated 13 June 2022,,WA-BIO,Category 1,Department of Biodiversity and Conservation,MR-R1,MR-S1,MR-R1-V1 +ABC123,WAM,Cowaramup Bay Road,-33.8,115.21,WGS84,30,Coordinates generalised,26/09/2019,,new sampling protocol,new basis of record,Stream Environment and Water Pty Ltd,PE:12:8833,new occurrence status,new habitat,new establishment means,Leaves brown,6,,,new life stage,new sex,new reproductiveCondition,MR-457,Stream Environment and Water Pty Ltd,32238,ARACH,WAM,BHP2012-7522 | M12379,BHP,new preparations,27/09/2019,https://www.ncbi.nlm.nih.gov/nuccore/MH040669.1 | https://www.ncbi.nlm.nih.gov/nuccore/MH040616.1,new sequencing method,Caladenia ?excelsa,2019-09-27T12:34+08:00,Stream Environment and Water Pty Ltd,new identification method,Caladenia excelsa,new identification qualifier,new remarks,Caladenia excelsa Hopper & A.P.Br.,new kingdom,new taxon rank,new threat status,WA,a random selection,,,Category 1,Department of Biodiversity and Conservation,MR-R1,MR-S1, diff --git a/abis_mapping/templates/survey_occurrence_data_v3/examples/margaret_river_flora/margaret_river_flora.ttl b/abis_mapping/templates/survey_occurrence_data_v3/examples/margaret_river_flora/margaret_river_flora.ttl new file mode 100644 index 00000000..7ff9007b --- /dev/null +++ b/abis_mapping/templates/survey_occurrence_data_v3/examples/margaret_river_flora/margaret_river_flora.ttl @@ -0,0 +1,1991 @@ +@prefix abis: . +@prefix dwc: . +@prefix geo: . +@prefix prov: . +@prefix rdf: . +@prefix rdfs: . +@prefix schema: . +@prefix skos: . +@prefix sosa: . +@prefix tern: . +@prefix time: . +@prefix xsd: . + + a schema:Collection ; + schema:isPartOf ; + schema:member ; + schema:name "Occurrence Collection - Basis Of Record - HumanObservation" ; + tern:hasAttribute . + + a schema:Collection ; + schema:isPartOf ; + schema:member , + ; + schema:name "Occurrence Collection - Basis Of Record - PreservedSpecimen" ; + tern:hasAttribute . + + a schema:Collection ; + schema:isPartOf ; + schema:member ; + schema:name "Occurrence Collection - Basis Of Record - new basis of record" ; + tern:hasAttribute . + + a schema:Collection ; + schema:isPartOf ; + schema:member , + ; + schema:name "Occurrence Collection - Conservation Authority - WA" ; + tern:hasAttribute . + + a schema:Collection ; + schema:isPartOf ; + schema:member ; + schema:name "Occurrence Collection - Data Generalizations - Coordinates generalised" ; + tern:hasAttribute . + + a schema:Collection ; + schema:isPartOf ; + schema:member , + ; + schema:name "Occurrence Collection - Data Generalizations - Coordinates rounded to the nearest 10 km for conservation concern" ; + tern:hasAttribute . + + a schema:Collection ; + schema:isPartOf ; + schema:member ; + schema:name "Occurrence Collection - Habitat - Closed forest of Melaleuca lanceolata. White, grey or brown sand, sandy loam." ; + tern:hasAttribute . + + a schema:Collection ; + schema:isPartOf ; + schema:member ; + schema:name "Occurrence Collection - Habitat - new habitat" ; + tern:hasAttribute . + + a schema:Collection ; + schema:isPartOf ; + schema:member , + ; + schema:name "Occurrence Collection - Identification Qualifier - ?" ; + tern:hasAttribute . + + a schema:Collection ; + schema:isPartOf ; + schema:member ; + schema:name "Occurrence Collection - Identification Qualifier - new identification qualifier" ; + tern:hasAttribute . + + a schema:Collection ; + schema:isPartOf ; + schema:member ; + schema:name "Occurrence Collection - Identification Qualifier - species incerta" ; + tern:hasAttribute . + + a schema:Collection ; + schema:isPartOf ; + schema:member ; + schema:name "Occurrence Collection - Identification Remarks - Could not confirm the ID due to damaged flower" ; + tern:hasAttribute . + + a schema:Collection ; + schema:isPartOf ; + schema:member ; + schema:name "Occurrence Collection - Identification Remarks - One unopened flower when recorded and one leaf only. ID not confirmed" ; + tern:hasAttribute . + + a schema:Collection ; + schema:isPartOf ; + schema:member ; + schema:name "Occurrence Collection - Identification Remarks - new remarks" ; + tern:hasAttribute . + + a schema:Collection ; + schema:isPartOf ; + schema:member ; + schema:name "Occurrence Collection - Identification Remarks - no flowers present" ; + tern:hasAttribute . + + a schema:Collection ; + schema:isPartOf ; + schema:member ; + schema:name "Occurrence Collection - Preparations - Wet (in ethanol or some other preservative)" ; + tern:hasAttribute . + + a schema:Collection ; + schema:isPartOf ; + schema:member ; + schema:name "Occurrence Collection - Preparations - new preparations" ; + tern:hasAttribute . + + a schema:Collection ; + schema:isPartOf ; + schema:member , + ; + schema:name "Occurrence Collection - Sensitivity Category - Category 1" ; + tern:hasAttribute . + + a schema:Collection ; + schema:isPartOf ; + schema:member ; + schema:name "Occurrence Collection - Taxon Rank - new taxon rank" ; + tern:hasAttribute . + + a schema:Collection ; + schema:isPartOf ; + schema:member ; + schema:name "Occurrence Collection - Taxon Rank - species" ; + tern:hasAttribute . + + a abis:BiodiversityRecord ; + schema:about ; + schema:identifier "1"^^ ; + schema:isPartOf . + + a abis:BiodiversityRecord ; + schema:about ; + schema:identifier "10"^^ ; + schema:isPartOf . + + a abis:BiodiversityRecord ; + schema:about ; + schema:identifier "11"^^ ; + schema:isPartOf . + + a abis:BiodiversityRecord ; + schema:about ; + schema:identifier "12"^^ ; + schema:isPartOf . + + a abis:BiodiversityRecord ; + schema:about ; + schema:identifier "13"^^ ; + schema:isPartOf . + + a abis:BiodiversityRecord ; + schema:about ; + schema:identifier "14"^^ ; + schema:isPartOf . + + a abis:BiodiversityRecord ; + schema:about ; + schema:identifier "2"^^ ; + schema:isPartOf . + + a abis:BiodiversityRecord ; + schema:about ; + schema:identifier "3"^^ ; + schema:isPartOf . + + a abis:BiodiversityRecord ; + schema:about ; + schema:identifier "4"^^ ; + schema:isPartOf . + + a abis:BiodiversityRecord ; + schema:about ; + schema:identifier "5"^^ ; + schema:isPartOf . + + a abis:BiodiversityRecord ; + schema:about ; + schema:identifier "6"^^ ; + schema:isPartOf . + + a abis:BiodiversityRecord ; + schema:about ; + schema:identifier "7"^^ ; + schema:isPartOf . + + a abis:BiodiversityRecord ; + schema:about ; + schema:identifier "8"^^ ; + schema:isPartOf . + + a abis:BiodiversityRecord ; + schema:about ; + schema:identifier "9"^^ ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "acceptedNameUsage-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template dateIdentified used as proxy" ; + time:inXSDDateTimeStamp "2019-09-27T12:34:00+08:00"^^xsd:dateTimeStamp ] ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Caladenia excelsa Hopper & A.P.Br." ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "acceptedNameUsage-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template dateIdentified used as proxy" ; + time:inXSDDateTimeStamp "2019-09-27T12:34:00+08:00"^^xsd:dateTimeStamp ] ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Caladenia excelsa Hopper & A.P.Br." ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "Observation method unknown, 'human observation' used as proxy", + "establishmentMeans-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "native" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "Observation method unknown, 'human observation' used as proxy", + "establishmentMeans-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "new establishment means" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "Observation method unknown, 'human observation' used as proxy", + "individualCount-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult 2 ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "Observation method unknown, 'human observation' used as proxy", + "individualCount-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult 6 ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "Observation method unknown, 'human observation' used as proxy", + "occurrenceStatus-observation" ; + time:hasTime [ a time:Instant ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "present" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "Observation method unknown, 'human observation' used as proxy", + "occurrenceStatus-observation" ; + time:hasTime [ a time:Instant ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "new occurrence status" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "Observation method unknown, 'human observation' used as proxy", + "organismRemarks-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Dried out leaf tips" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "Observation method unknown, 'human observation' used as proxy", + "organismRemarks-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Leaves brown" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "scientificName-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + prov:wasAssociatedWith ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Calothamnus lateralis var. crassus" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "scientificName-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + prov:wasAssociatedWith ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Caladenia excelsa" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "scientificName-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Caladenia excelsa" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "scientificName-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + prov:wasAssociatedWith ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Caladenia excelsa" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "scientificName-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + prov:wasAssociatedWith ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Boronia anceps" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "scientificName-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + prov:wasAssociatedWith ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Boronia anceps" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "scientificName-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + prov:wasAssociatedWith ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Boronia anceps" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "scientificName-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + prov:wasAssociatedWith ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Banksia sessilis var. cordata" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "scientificName-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + prov:wasAssociatedWith ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Banksia sessilis var. cordata" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "scientificName-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + prov:wasAssociatedWith ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Banksia sessilis var. cordata" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "scientificName-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + prov:wasAssociatedWith ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Banksia sessilis var. cordata" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "scientificName-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + prov:wasAssociatedWith ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Caladenia excelsa" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "Observation method unknown, 'human observation' used as proxy", + "lifeStage-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "adult" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "Observation method unknown, 'human observation' used as proxy", + "lifeStage-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "new life stage" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "Observation method unknown, 'human observation' used as proxy", + "reproductiveCondition-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "No breeding evident" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "Observation method unknown, 'human observation' used as proxy", + "reproductiveCondition-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "new reproductiveCondition" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "Observation method unknown, 'human observation' used as proxy", + "sex-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "male" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "Observation method unknown, 'human observation' used as proxy", + "sex-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "new sex" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "verbatimID-observation" ; + time:hasTime [ a time:Instant ; + time:inXSDDateTimeStamp "2019-09-27T12:34:00+08:00"^^xsd:dateTimeStamp ] ; + prov:wasAssociatedWith ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Caladenia ?excelsa" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "verbatimID-observation" ; + time:hasTime [ a time:Instant ; + time:inXSDDateTimeStamp "2019-09-27T12:34:00+08:00"^^xsd:dateTimeStamp ] ; + prov:wasAssociatedWith ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Caladenia ?excelsa" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "verbatimID-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + prov:wasAssociatedWith ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Calothamnus lateralis var. crassus" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "verbatimID-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + prov:wasAssociatedWith ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Caladenia excelsa" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "verbatimID-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + prov:wasAssociatedWith ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Caladenia ?excelsa" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "verbatimID-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + prov:wasAssociatedWith ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Caladenia ?excelsa" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "verbatimID-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + prov:wasAssociatedWith ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Boronia anceps" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "verbatimID-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + prov:wasAssociatedWith ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Boronia anceps" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "verbatimID-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + prov:wasAssociatedWith ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Boronia anceps" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "verbatimID-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + prov:wasAssociatedWith ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Banksia sessilis var. cordata" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "verbatimID-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + prov:wasAssociatedWith ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Banksia sessilis var. cordata" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "verbatimID-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + prov:wasAssociatedWith ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Banksia sessilis var. cordata" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "verbatimID-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + prov:wasAssociatedWith ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Banksia sessilis var. cordata" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "verbatimID-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + prov:wasAssociatedWith ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Caladenia excelsa" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a rdfs:Datatype ; + skos:definition "A catalog number for the sample" ; + skos:prefLabel "BHP catalogNumber" ; + prov:wasAttributedTo . + + a rdfs:Datatype ; + skos:definition "A catalog number for the sample" ; + skos:prefLabel "WAM catalogNumber" ; + prov:wasAttributedTo . + + a rdfs:Datatype ; + skos:definition "An identifier for the record" ; + skos:prefLabel "Stream Environment and Water Pty Ltd recordID" ; + prov:qualifiedAttribution . + + a rdfs:Datatype ; + skos:definition "An identifier for the record" ; + skos:prefLabel "WAM recordID" ; + prov:qualifiedAttribution . + + a rdfs:Datatype ; + skos:definition "The record number of the original observation from the original observer of the organism" ; + skos:prefLabel "Stream Environment and Water Pty Ltd recordNumber" ; + prov:wasAttributedTo . + + a tern:SiteVisit ; + schema:isPartOf . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "HumanObservation" ; + tern:hasValue . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "PreservedSpecimen" ; + tern:hasValue . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "new basis of record" ; + tern:hasValue . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "WA" ; + tern:hasValue . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "Coordinates generalised" ; + tern:hasValue . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "Coordinates rounded to the nearest 10 km for conservation concern" ; + tern:hasValue . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "Closed forest of Melaleuca lanceolata. White, grey or brown sand, sandy loam." ; + tern:hasValue . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "new habitat" ; + tern:hasValue . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "?" ; + tern:hasValue . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "new identification qualifier" ; + tern:hasValue . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "species incerta" ; + tern:hasValue . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "Could not confirm the ID due to damaged flower" ; + tern:hasValue . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "One unopened flower when recorded and one leaf only. ID not confirmed" ; + tern:hasValue . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "new remarks" ; + tern:hasValue . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "no flowers present" ; + tern:hasValue . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "Wet (in ethanol or some other preservative)" ; + tern:hasValue . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "new preparations" ; + tern:hasValue . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "Category 1 - Department of Biodiversity and Conservation" ; + tern:hasValue . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "new taxon rank" ; + tern:hasValue . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "species" ; + tern:hasValue . + + a prov:Attribution ; + prov:agent ; + prov:hadRole . + + a prov:Attribution ; + prov:agent ; + prov:hadRole . + + a skos:Concept ; + skos:broader ; + skos:definition "A type of basisOfRecord." ; + skos:inScheme ; + skos:prefLabel "new basis of record" ; + schema:citation "https://linked.data.gov.au/dataset/bdr/00000000-0000-0000-0000-000000000000"^^xsd:anyURI . + + a skos:Concept ; + skos:broader ; + skos:definition "A type of identificationQualifier." ; + skos:inScheme ; + skos:prefLabel "?" ; + schema:citation "https://linked.data.gov.au/dataset/bdr/00000000-0000-0000-0000-000000000000"^^xsd:anyURI . + + a skos:Concept ; + skos:broader ; + skos:definition "A type of identificationQualifier." ; + skos:inScheme ; + skos:prefLabel "new identification qualifier" ; + schema:citation "https://linked.data.gov.au/dataset/bdr/00000000-0000-0000-0000-000000000000"^^xsd:anyURI . + + a skos:Concept ; + skos:broader ; + skos:definition "A type of preparations." ; + skos:inScheme ; + skos:prefLabel "Wet (in ethanol or some other preservative)" ; + schema:citation "https://linked.data.gov.au/dataset/bdr/00000000-0000-0000-0000-000000000000"^^xsd:anyURI . + + a skos:Concept ; + skos:broader ; + skos:definition "A type of preparations." ; + skos:inScheme ; + skos:prefLabel "new preparations" ; + schema:citation "https://linked.data.gov.au/dataset/bdr/00000000-0000-0000-0000-000000000000"^^xsd:anyURI . + + a skos:Concept ; + skos:broader ; + skos:definition "A type of sensitivityCategory." ; + skos:inScheme ; + skos:prefLabel "Category 1" ; + skos:scopeNote "Under the authority of Department of Biodiversity and Conservation" ; + schema:citation "https://linked.data.gov.au/dataset/bdr/00000000-0000-0000-0000-000000000000"^^xsd:anyURI . + + a skos:Concept ; + skos:broader ; + skos:definition "A type of targetHabitatScope" ; + skos:inScheme ; + skos:prefLabel "Closed forest of Melaleuca lanceolata. White, grey or brown sand, sandy loam." ; + schema:citation "https://linked.data.gov.au/dataset/bdr/00000000-0000-0000-0000-000000000000"^^xsd:anyURI . + + a skos:Concept ; + skos:broader ; + skos:definition "A type of targetHabitatScope" ; + skos:inScheme ; + skos:prefLabel "new habitat" ; + schema:citation "https://linked.data.gov.au/dataset/bdr/00000000-0000-0000-0000-000000000000"^^xsd:anyURI . + + a skos:Concept ; + skos:broader ; + skos:definition "A type of taxonRank." ; + skos:inScheme ; + skos:prefLabel "new taxon rank" ; + schema:citation "https://linked.data.gov.au/dataset/bdr/00000000-0000-0000-0000-000000000000"^^xsd:anyURI . + + a skos:Concept ; + skos:definition "The existence of the organism from this kingdom sampled at a particular place at a particular time." ; + skos:inScheme ; + skos:prefLabel "new kingdom occurrence" . + + a skos:Concept ; + skos:definition "A specimen sampled from an organism of this kingdom." ; + skos:inScheme ; + skos:prefLabel "new kingdom specimen" ; + schema:citation "https://linked.data.gov.au/dataset/bdr/00000000-0000-0000-0000-000000000000"^^xsd:anyURI . + + a skos:Concept ; + skos:broader ; + skos:definition "A type of samplingProtocol." ; + skos:inScheme ; + skos:prefLabel "new sampling protocol" . + + a skos:Concept ; + skos:broader ; + skos:definition "A type of sequencingMethod." ; + skos:inScheme ; + skos:prefLabel "Sanger dideoxy sequencing" ; + schema:citation "https://linked.data.gov.au/dataset/bdr/00000000-0000-0000-0000-000000000000"^^xsd:anyURI . + + a skos:Concept ; + skos:broader ; + skos:definition "A type of sequencingMethod." ; + skos:inScheme ; + skos:prefLabel "new sequencing method" ; + schema:citation "https://linked.data.gov.au/dataset/bdr/00000000-0000-0000-0000-000000000000"^^xsd:anyURI . + + a skos:Concept ; + skos:broader ; + skos:definition "A type of threatStatusCheckProtocol." ; + skos:inScheme ; + skos:prefLabel "Check against Threatened and Priority Fauna List WA available from https://www.dpaw.wa.gov.au/plants-and-animals/threatened-species-and-communities/threatened-animals. Last updated 13 June 2022" ; + schema:citation "https://linked.data.gov.au/dataset/bdr/00000000-0000-0000-0000-000000000000"^^xsd:anyURI . + + a skos:Concept ; + skos:broader ; + skos:definition "A type of threatStatusCheckProtocol." ; + skos:inScheme ; + skos:prefLabel "a random selection" ; + schema:citation "https://linked.data.gov.au/dataset/bdr/00000000-0000-0000-0000-000000000000"^^xsd:anyURI . + + a skos:Concept ; + skos:broader ; + skos:definition "A type of establishmentMeans." ; + skos:inScheme ; + skos:prefLabel "new establishment means" ; + schema:citation "https://linked.data.gov.au/dataset/bdr/00000000-0000-0000-0000-000000000000"^^xsd:anyURI . + + a skos:Concept ; + skos:broader ; + skos:definition "A type of lifeStage." ; + skos:inScheme ; + skos:prefLabel "new life stage" ; + schema:citation "https://linked.data.gov.au/dataset/bdr/00000000-0000-0000-0000-000000000000"^^xsd:anyURI . + + a skos:Concept ; + skos:broader ; + skos:definition "A type of occurrenceStatus." ; + skos:inScheme ; + skos:prefLabel "new occurrence status" ; + schema:citation "https://linked.data.gov.au/dataset/bdr/00000000-0000-0000-0000-000000000000"^^xsd:anyURI . + + a skos:Concept ; + skos:broader ; + skos:definition "A type of reproductiveCondition." ; + skos:inScheme ; + skos:prefLabel "No breeding evident" ; + schema:citation "https://linked.data.gov.au/dataset/bdr/00000000-0000-0000-0000-000000000000"^^xsd:anyURI . + + a skos:Concept ; + skos:broader ; + skos:definition "A type of reproductiveCondition." ; + skos:inScheme ; + skos:prefLabel "new reproductiveCondition" ; + schema:citation "https://linked.data.gov.au/dataset/bdr/00000000-0000-0000-0000-000000000000"^^xsd:anyURI . + + a skos:Concept ; + skos:broader ; + skos:definition "A type of sex." ; + skos:inScheme ; + skos:prefLabel "new sex" ; + schema:citation "https://linked.data.gov.au/dataset/bdr/00000000-0000-0000-0000-000000000000"^^xsd:anyURI . + + a skos:Concept ; + skos:broader ; + skos:definition "A type of threatStatus." ; + skos:inScheme ; + skos:prefLabel "WA/new threat status" ; + schema:citation "https://linked.data.gov.au/dataset/bdr/00000000-0000-0000-0000-000000000000"^^xsd:anyURI . + + a abis:BiodiversityRecord ; + schema:about ; + schema:identifier "8022FSJMJ079c5cf"^^ ; + schema:isPartOf . + + a abis:BiodiversityRecord ; + schema:about ; + schema:identifier "ABC123"^^ ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "threatStatus-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template dateIdentified used as proxy" ; + time:inXSDDateTimeStamp "2019-09-27T12:34:00+08:00"^^xsd:dateTimeStamp ] ; + prov:wasAssociatedWith ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "VU" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "threatStatus-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template dateIdentified used as proxy" ; + time:inXSDDateTimeStamp "2019-09-27T12:34:00+08:00"^^xsd:dateTimeStamp ] ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "new threat status" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:IRI, + tern:Value ; + rdfs:label "establishmentMeans-value" ; + rdf:value . + + a tern:IRI, + tern:Value ; + rdfs:label "establishmentMeans-value" ; + rdf:value . + + a tern:Integer, + tern:Value ; + rdfs:label "individual-count" ; + rdf:value 2 . + + a tern:Integer, + tern:Value ; + rdfs:label "individual-count" ; + rdf:value 6 . + + a tern:IRI, + tern:Value ; + rdfs:label "occurrenceStatus = new occurrence status" ; + rdf:value . + + a tern:IRI, + tern:Value ; + rdfs:label "occurrenceStatus = present" ; + rdf:value . + + a tern:Text, + tern:Value ; + rdfs:label "organism-remarks" ; + rdf:value "Dried out leaf tips" . + + a tern:Text, + tern:Value ; + rdfs:label "organism-remarks" ; + rdf:value "Leaves brown" . + + a tern:FeatureOfInterest, + tern:Text, + tern:Value ; + rdfs:label "scientificName" ; + rdf:value "Calothamnus lateralis var. crassus" ; + schema:isPartOf ; + tern:featureType . + + a tern:IRI, + tern:Value ; + rdfs:label "lifeStage-value" ; + rdf:value . + + a tern:IRI, + tern:Value ; + rdfs:label "lifeStage-value" ; + rdf:value . + + a tern:IRI, + tern:Value ; + rdfs:label "reproductiveCondition-value" ; + rdf:value . + + a tern:IRI, + tern:Value ; + rdfs:label "reproductiveCondition-value" ; + rdf:value . + + a tern:IRI, + tern:Value ; + rdfs:label "sex-value" ; + rdf:value . + + a tern:IRI, + tern:Value ; + rdfs:label "sex-value" ; + rdf:value . + + a tern:IRI, + tern:Value ; + rdfs:label "Conservation status = VU" ; + rdf:value . + + a tern:IRI, + tern:Value ; + rdfs:label "Conservation status = new threat status" ; + rdf:value . + + a tern:Text, + tern:Value ; + rdf:value "Calothamnus lateralis var. crassus" . + + a tern:FeatureOfInterest, + tern:Sample ; + rdfs:comment "sequence-sample" ; + sosa:isResultOf ; + sosa:isSampleOf ; + schema:identifier "https://www.ncbi.nlm.nih.gov/nuccore/MH040616.1", + "https://www.ncbi.nlm.nih.gov/nuccore/MH040669.1" ; + schema:isPartOf ; + tern:featureType . + + a tern:FeatureOfInterest, + tern:Sample ; + rdfs:comment "sequence-sample" ; + sosa:isResultOf ; + sosa:isSampleOf ; + schema:identifier "https://www.ncbi.nlm.nih.gov/nuccore/MH040616.1", + "https://www.ncbi.nlm.nih.gov/nuccore/MH040669.1" ; + schema:isPartOf ; + tern:featureType . + + a tern:IRI, + tern:Value ; + rdfs:label "HumanObservation" ; + rdf:value . + + a tern:IRI, + tern:Value ; + rdfs:label "PreservedSpecimen" ; + rdf:value . + + a tern:IRI, + tern:Value ; + rdfs:label "new basis of record" ; + rdf:value . + + a tern:IRI, + tern:Value ; + rdfs:label "WA" ; + rdf:value . + + a tern:Text, + tern:Value ; + rdf:value "Coordinates generalised" . + + a tern:Text, + tern:Value ; + rdf:value "Coordinates rounded to the nearest 10 km for conservation concern" . + + a tern:IRI, + tern:Value ; + rdfs:label "Closed forest of Melaleuca lanceolata. White, grey or brown sand, sandy loam." ; + rdf:value . + + a tern:IRI, + tern:Value ; + rdfs:label "new habitat" ; + rdf:value . + + a tern:IRI, + tern:Value ; + rdfs:label "?" ; + rdf:value . + + a tern:IRI, + tern:Value ; + rdfs:label "new identification qualifier" ; + rdf:value . + + a tern:IRI, + tern:Value ; + rdfs:label "species incerta" ; + rdf:value . + + a tern:Text, + tern:Value ; + rdf:value "Could not confirm the ID due to damaged flower" . + + a tern:Text, + tern:Value ; + rdf:value "One unopened flower when recorded and one leaf only. ID not confirmed" . + + a tern:Text, + tern:Value ; + rdf:value "new remarks" . + + a tern:Text, + tern:Value ; + rdf:value "no flowers present" . + + a tern:IRI, + tern:Value ; + rdfs:label "Wet (in ethanol or some other preservative)" ; + rdf:value . + + a tern:IRI, + tern:Value ; + rdfs:label "new preparations" ; + rdf:value . + + a tern:IRI, + tern:Value ; + rdfs:label "sensitivity category = Category 1" ; + rdf:value . + + a tern:IRI, + tern:Value ; + rdfs:label "new taxon rank" ; + rdf:value . + + a tern:IRI, + tern:Value ; + rdfs:label "species" ; + rdf:value . + + a prov:Agent ; + schema:name "BHP" . + + a prov:Agent ; + schema:name "Test Pty Ltd" . + + a prov:Agent ; + schema:name "WA-BIO" . + + a tern:FeatureOfInterest, + tern:Site ; + schema:isPartOf ; + tern:featureType . + + a tern:Survey ; + schema:isPartOf . + + a skos:Concept ; + skos:broader ; + skos:definition "A type of identificationMethod." ; + skos:inScheme ; + skos:prefLabel "Visually identified in the field (sighting)" ; + schema:citation "https://linked.data.gov.au/dataset/bdr/00000000-0000-0000-0000-000000000000"^^xsd:anyURI . + + a skos:Concept ; + skos:broader ; + skos:definition "A type of identificationMethod." ; + skos:inScheme ; + skos:prefLabel "new identification method" ; + schema:citation "https://linked.data.gov.au/dataset/bdr/00000000-0000-0000-0000-000000000000"^^xsd:anyURI . + + a tern:Observation ; + rdfs:comment "scientificName-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + prov:wasAssociatedWith ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Caladenia excelsa" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "scientificName-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + prov:wasAssociatedWith ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Caladenia excelsa" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:FeatureOfInterest, + tern:Text, + tern:Value ; + rdfs:label "acceptedNameUsage-value" ; + rdf:value "Caladenia excelsa Hopper & A.P.Br." ; + schema:isPartOf ; + tern:featureType . + + a tern:Text, + tern:Value ; + rdf:value "Caladenia ?excelsa" . + + a tern:FeatureOfInterest, + tern:Sample ; + rdfs:comment "specimen-sample" ; + sosa:isResultOf ; + sosa:isSampleOf ; + schema:isPartOf ; + tern:featureType . + + a tern:Sampling ; + geo:hasGeometry _:Nbaf38917362852ca9a9a0d2500000001 ; + geo:hasMetricSpatialAccuracy 5e+01 ; + rdfs:comment "sequencing-sampling" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Sampling ; + geo:hasGeometry _:Nbaf38917362852ca9a9a0d2500000003 ; + geo:hasMetricSpatialAccuracy 3e+01 ; + rdfs:comment "sequencing-sampling" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Sampling ; + geo:hasGeometry _:Nfcd8f1042640aeab2a686b1700000001 ; + rdfs:comment "specimen-sampling" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Sampling ; + geo:hasGeometry _:Nfcd8f1042640aeab2a686b1700000003 ; + geo:hasMetricSpatialAccuracy 5e+01 ; + rdfs:comment "specimen-sampling" ; + time:hasTime [ a time:Instant ; + time:inXSDDate "2019-09-26"^^xsd:date ] ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Sampling ; + geo:hasGeometry _:Nfcd8f1042640aeab2a686b1700000005 ; + geo:hasMetricSpatialAccuracy 3e+01 ; + rdfs:comment "specimen-sampling" ; + time:hasTime [ a time:Instant ; + time:inXSDDate "2019-09-27"^^xsd:date ] ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:usedProcedure ; + schema:isPartOf . + + a prov:Agent ; + schema:name "WAM" . + + a tern:Observation ; + rdfs:comment "scientificName-observation" ; + time:hasTime [ a time:Instant ; + time:inXSDDateTimeStamp "2019-09-27T12:34:00+08:00"^^xsd:dateTimeStamp ] ; + prov:wasAssociatedWith ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Caladenia excelsa" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "scientificName-observation" ; + time:hasTime [ a time:Instant ; + time:inXSDDateTimeStamp "2019-09-27T12:34:00+08:00"^^xsd:dateTimeStamp ] ; + prov:wasAssociatedWith ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Caladenia excelsa" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a dwc:Occurrence, + tern:FeatureOfInterest ; + sosa:usedProcedure ; + schema:isPartOf , + ; + schema:spatial _:N7bfc9936b099cf9353fc575500000021 ; + schema:temporal [ a time:TemporalEntity ; + time:hasBeginning [ a time:Instant ; + time:inXSDDate "2019-09-26"^^xsd:date ] ] ; + tern:featureType . + + a tern:FeatureOfInterest, + tern:Text, + tern:Value ; + rdfs:label "scientificName" ; + rdf:value "Boronia anceps" ; + schema:isPartOf ; + tern:featureType . + + a tern:Text, + tern:Value ; + rdf:value "Boronia anceps" . + + a dwc:Occurrence, + tern:FeatureOfInterest ; + prov:wasAssociatedWith ; + sosa:usedProcedure ; + schema:isPartOf , + ; + schema:spatial _:N7bfc9936b099cf9353fc575500000000 ; + schema:temporal [ a time:TemporalEntity ; + time:hasBeginning [ a time:Instant ; + time:inXSDDate "2019-09-26"^^xsd:date ] ] ; + tern:featureType ; + tern:locationDescription "Cowaramup Bay Road" . + + a dwc:Occurrence, + tern:FeatureOfInterest ; + prov:wasAssociatedWith ; + sosa:usedProcedure ; + schema:isPartOf , + ; + schema:spatial _:N7bfc9936b099cf9353fc57550000001b ; + schema:temporal [ a time:TemporalEntity ; + time:hasBeginning [ a time:Instant ; + time:inXSDDate "2019-09-26"^^xsd:date ] ] ; + tern:featureType ; + tern:locationDescription "Cowaramup Bay Road" . + + a dwc:Occurrence, + tern:FeatureOfInterest ; + prov:wasAssociatedWith ; + sosa:usedProcedure ; + schema:isPartOf , + ; + schema:spatial _:N7bfc9936b099cf9353fc57550000001e ; + schema:temporal [ a time:TemporalEntity ; + time:hasBeginning [ a time:Instant ; + time:inXSDDate "2019-09-26"^^xsd:date ] ] ; + tern:featureType ; + tern:locationDescription "Cowaramup Bay Road" . + + a dwc:Occurrence, + tern:FeatureOfInterest ; + sosa:usedProcedure ; + schema:identifier "PE:12:8831" ; + schema:isPartOf , + ; + schema:spatial _:N7bfc9936b099cf9353fc575500000003 ; + schema:temporal [ a time:TemporalEntity ; + time:hasBeginning [ a time:Instant ; + time:inXSDDate "2019-09-26"^^xsd:date ] ] ; + tern:featureType ; + tern:locationDescription "Cowaramup Bay Road" . + + a dwc:Occurrence, + tern:FeatureOfInterest ; + prov:wasAssociatedWith ; + sosa:usedProcedure ; + schema:isPartOf , + ; + schema:spatial _:N7bfc9936b099cf9353fc575500000006 ; + schema:temporal [ a time:TemporalEntity ; + time:hasBeginning [ a time:Instant ; + time:inXSDDate "2019-09-26"^^xsd:date ] ] ; + tern:featureType ; + tern:locationDescription "Cowaramup Bay Road" . + + a dwc:Occurrence, + tern:FeatureOfInterest ; + prov:wasAssociatedWith ; + sosa:usedProcedure ; + schema:isPartOf , + ; + schema:spatial _:N7bfc9936b099cf9353fc575500000009 ; + schema:temporal [ a time:TemporalEntity ; + time:hasBeginning [ a time:Instant ; + time:inXSDDate "2019-09-26"^^xsd:date ] ] ; + tern:featureType ; + tern:locationDescription "Cowaramup Bay Road" . + + a dwc:Occurrence, + tern:FeatureOfInterest ; + prov:wasAssociatedWith ; + sosa:usedProcedure ; + schema:isPartOf , + ; + schema:spatial _:N7bfc9936b099cf9353fc57550000000c ; + schema:temporal [ a time:TemporalEntity ; + time:hasBeginning [ a time:Instant ; + time:inXSDDate "2019-09-26"^^xsd:date ] ] ; + tern:featureType ; + tern:locationDescription "Cowaramup Bay Road" . + + a dwc:Occurrence, + tern:FeatureOfInterest ; + prov:wasAssociatedWith ; + sosa:usedProcedure ; + schema:isPartOf , + ; + schema:spatial _:N7bfc9936b099cf9353fc57550000000f ; + schema:temporal [ a time:TemporalEntity ; + time:hasBeginning [ a time:Instant ; + time:inXSDDate "2019-09-26"^^xsd:date ] ] ; + tern:featureType ; + tern:locationDescription "Cowaramup Bay Road" . + + a dwc:Occurrence, + tern:FeatureOfInterest ; + prov:wasAssociatedWith ; + sosa:usedProcedure ; + schema:isPartOf , + ; + schema:spatial _:N7bfc9936b099cf9353fc575500000012 ; + schema:temporal [ a time:TemporalEntity ; + time:hasBeginning [ a time:Instant ; + time:inXSDDate "2019-09-26"^^xsd:date ] ] ; + tern:featureType ; + tern:locationDescription "Cowaramup Bay Road" . + + a dwc:Occurrence, + tern:FeatureOfInterest ; + prov:wasAssociatedWith ; + sosa:usedProcedure ; + schema:isPartOf , + ; + schema:spatial _:N7bfc9936b099cf9353fc575500000015 ; + schema:temporal [ a time:TemporalEntity ; + time:hasBeginning [ a time:Instant ; + time:inXSDDate "2019-09-26"^^xsd:date ] ] ; + tern:featureType ; + tern:locationDescription "Cowaramup Bay Road" . + + a dwc:Occurrence, + tern:FeatureOfInterest ; + prov:wasAssociatedWith ; + sosa:usedProcedure ; + schema:isPartOf , + ; + schema:spatial _:N7bfc9936b099cf9353fc575500000018 ; + schema:temporal [ a time:TemporalEntity ; + time:hasBeginning [ a time:Instant ; + time:inXSDDate "2019-09-26"^^xsd:date ] ] ; + tern:featureType ; + tern:locationDescription "Cowaramup Bay Road" . + + a tern:FeatureOfInterest, + tern:Text, + tern:Value ; + rdfs:label "scientificName" ; + rdf:value "Banksia sessilis var. cordata" ; + schema:isPartOf ; + tern:featureType . + + a tern:Text, + tern:Value ; + rdf:value "Banksia sessilis var. cordata" . + + a tern:Text, + tern:Value ; + rdf:value "Caladenia ?excelsa", + "Caladenia excelsa" . + + a dwc:Occurrence, + tern:FeatureOfInterest ; + dwc:catalogNumber "CC123"^^ ; + dwc:collectionCode "C01" ; + prov:wasAssociatedWith ; + sosa:usedProcedure ; + schema:isPartOf , + ; + schema:spatial _:N7bfc9936b099cf9353fc575500000024 ; + schema:temporal [ a time:TemporalEntity ; + time:hasBeginning [ a time:Instant ; + time:inXSDDate "2019-09-26"^^xsd:date ] ] ; + tern:featureType ; + tern:locationDescription "Cowaramup Bay Road" . + + a dwc:Occurrence, + tern:FeatureOfInterest ; + dwc:catalogNumber "CC456"^^ ; + dwc:collectionCode "C01" ; + prov:wasAssociatedWith ; + sosa:usedProcedure ; + schema:isPartOf , + ; + schema:spatial _:N7bfc9936b099cf9353fc575500000027 ; + schema:temporal [ a time:TemporalEntity ; + time:hasBeginning [ a time:Instant ; + time:inXSDDate "2019-09-26"^^xsd:date ] ] ; + tern:featureType ; + tern:locationDescription "Cowaramup Bay Road" . + + a tern:FeatureOfInterest, + tern:Sample ; + rdfs:comment "specimen-sample" ; + sosa:isResultOf ; + sosa:isSampleOf ; + schema:isPartOf ; + tern:featureType . + + a tern:FeatureOfInterest, + tern:Sample ; + rdfs:comment "specimen-sample" ; + sosa:isResultOf ; + sosa:isSampleOf ; + schema:isPartOf ; + tern:featureType . + + a tern:FeatureOfInterest, + tern:Text, + tern:Value ; + rdfs:label "scientificName" ; + rdf:value "Caladenia excelsa" ; + schema:isPartOf ; + tern:featureType . + + a dwc:Occurrence, + tern:FeatureOfInterest ; + dwc:catalogNumber "ARACH"^^ ; + dwc:collectionCode "32237" ; + prov:wasAssociatedWith ; + sosa:isSampleOf ; + sosa:usedProcedure ; + schema:identifier "BHP2012-7521"^^, + "M12378"^^, + "MR-456"^^, + "PE:12:8832"^^ ; + schema:isPartOf , + ; + schema:spatial _:N7bfc9936b099cf9353fc57550000002a ; + schema:temporal [ a time:TemporalEntity ; + time:hasBeginning [ a time:Instant ; + time:inXSDDate "2019-09-26"^^xsd:date ] ] ; + tern:featureType ; + tern:hasSiteVisit ; + tern:locationDescription "Cowaramup Bay Road" . + + a dwc:Occurrence, + tern:FeatureOfInterest ; + dwc:catalogNumber "ARACH"^^ ; + dwc:collectionCode "32238" ; + prov:wasAssociatedWith ; + sosa:isSampleOf ; + sosa:usedProcedure ; + schema:identifier "BHP2012-7522"^^, + "M12379"^^, + "MR-457"^^, + "PE:12:8833"^^ ; + schema:isPartOf , + ; + schema:spatial _:N7bfc9936b099cf9353fc57550000002d ; + schema:temporal [ a time:TemporalEntity ; + time:hasBeginning [ a time:Instant ; + time:inXSDDate "2019-09-26"^^xsd:date ] ] ; + tern:featureType ; + tern:locationDescription "Cowaramup Bay Road" . + + a tern:Survey ; + schema:isPartOf . + + a prov:Agent ; + schema:name "Stream Environment and Water Pty Ltd" . + + a tern:Dataset . + +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " POINT (-33.8 115.21)"^^geo:wktLiteral ] ; + rdf:object _:Nbaf38917362852ca9a9a0d2500000001 ; + rdf:predicate geo:hasGeometry ; + rdf:subject ; + rdfs:comment "supplied as" . + +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " POINT (-33.8 115.21)"^^geo:wktLiteral ] ; + rdf:object _:Nbaf38917362852ca9a9a0d2500000003 ; + rdf:predicate geo:hasGeometry ; + rdf:subject ; + rdfs:comment "supplied as" . + +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " POINT (-33.8 115.21)"^^geo:wktLiteral ] ; + rdf:object _:N7bfc9936b099cf9353fc575500000000 ; + rdf:predicate schema:spatial ; + rdf:subject ; + rdfs:comment "supplied as" . + +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " POINT (-33.86 115.01)"^^geo:wktLiteral ] ; + rdf:object _:N7bfc9936b099cf9353fc575500000003 ; + rdf:predicate schema:spatial ; + rdf:subject ; + rdfs:comment "supplied as" . + +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " POINT (-33.86 115.01)"^^geo:wktLiteral ] ; + rdf:object _:N7bfc9936b099cf9353fc575500000006 ; + rdf:predicate schema:spatial ; + rdf:subject ; + rdfs:comment "supplied as" . + +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " POINT (-33.86 115.01)"^^geo:wktLiteral ] ; + rdf:object _:N7bfc9936b099cf9353fc575500000009 ; + rdf:predicate schema:spatial ; + rdf:subject ; + rdfs:comment "supplied as" . + +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " POINT (-33.86 114.99)"^^geo:wktLiteral ] ; + rdf:object _:N7bfc9936b099cf9353fc57550000000c ; + rdf:predicate schema:spatial ; + rdf:subject ; + rdfs:comment "supplied as" . + +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " POINT (-33.86 114.99)"^^geo:wktLiteral ] ; + rdf:object _:N7bfc9936b099cf9353fc57550000000f ; + rdf:predicate schema:spatial ; + rdf:subject ; + rdfs:comment "supplied as" . + +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " POINT (-33.86 114.99)"^^geo:wktLiteral ] ; + rdf:object _:N7bfc9936b099cf9353fc575500000012 ; + rdf:predicate schema:spatial ; + rdf:subject ; + rdfs:comment "supplied as" . + +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " POINT (-33.86 114.99)"^^geo:wktLiteral ] ; + rdf:object _:N7bfc9936b099cf9353fc575500000015 ; + rdf:predicate schema:spatial ; + rdf:subject ; + rdfs:comment "supplied as" . + +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " POINT (-33.86 115.02)"^^geo:wktLiteral ] ; + rdf:object _:N7bfc9936b099cf9353fc575500000018 ; + rdf:predicate schema:spatial ; + rdf:subject ; + rdfs:comment "supplied as" . + +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " POINT (-33.86 115.02)"^^geo:wktLiteral ] ; + rdf:object _:N7bfc9936b099cf9353fc57550000001b ; + rdf:predicate schema:spatial ; + rdf:subject ; + rdfs:comment "supplied as" . + +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " POINT (-33.86 115.02)"^^geo:wktLiteral ] ; + rdf:object _:N7bfc9936b099cf9353fc57550000001e ; + rdf:predicate schema:spatial ; + rdf:subject ; + rdfs:comment "supplied as" . + +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " POINT (-33.8 115.21)"^^geo:wktLiteral ] ; + rdf:object _:N7bfc9936b099cf9353fc575500000021 ; + rdf:predicate schema:spatial ; + rdf:subject ; + rdfs:comment "supplied as" . + +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " POINT (-33.86 115.02)"^^geo:wktLiteral ] ; + rdf:object _:N7bfc9936b099cf9353fc575500000024 ; + rdf:predicate schema:spatial ; + rdf:subject ; + rdfs:comment "supplied as" . + +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " POINT (-33.86 115.02)"^^geo:wktLiteral ; + geo:hasMetricSpatialAccuracy 2e+01 ] ; + rdf:object _:N7bfc9936b099cf9353fc575500000027 ; + rdf:predicate schema:spatial ; + rdf:subject ; + rdfs:comment "supplied as" . + +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " POINT (-33.8 115.21)"^^geo:wktLiteral ; + geo:hasMetricSpatialAccuracy 5e+01 ] ; + rdf:object _:N7bfc9936b099cf9353fc57550000002a ; + rdf:predicate schema:spatial ; + rdf:subject ; + rdfs:comment "supplied as" . + +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " POINT (-33.8 115.21)"^^geo:wktLiteral ; + geo:hasMetricSpatialAccuracy 3e+01 ] ; + rdf:object _:N7bfc9936b099cf9353fc57550000002d ; + rdf:predicate schema:spatial ; + rdf:subject ; + rdfs:comment "supplied as" . + +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " POINT (-33.86 115.02)"^^geo:wktLiteral ] ; + rdf:object _:Nfcd8f1042640aeab2a686b1700000001 ; + rdf:predicate geo:hasGeometry ; + rdf:subject ; + rdfs:comment "supplied as" . + +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " POINT (-33.8 115.21)"^^geo:wktLiteral ] ; + rdf:object _:Nfcd8f1042640aeab2a686b1700000003 ; + rdf:predicate geo:hasGeometry ; + rdf:subject ; + rdfs:comment "supplied as" . + +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " POINT (-33.8 115.21)"^^geo:wktLiteral ] ; + rdf:object _:Nfcd8f1042640aeab2a686b1700000005 ; + rdf:predicate geo:hasGeometry ; + rdf:subject ; + rdfs:comment "supplied as" . + +_:N7bfc9936b099cf9353fc575500000000 a geo:Geometry ; + geo:asWKT " POINT (-33.8 115.21)"^^geo:wktLiteral . + +_:N7bfc9936b099cf9353fc575500000003 a geo:Geometry ; + geo:asWKT " POINT (-33.86 115.01)"^^geo:wktLiteral . + +_:N7bfc9936b099cf9353fc575500000006 a geo:Geometry ; + geo:asWKT " POINT (-33.86 115.01)"^^geo:wktLiteral . + +_:N7bfc9936b099cf9353fc575500000009 a geo:Geometry ; + geo:asWKT " POINT (-33.86 115.01)"^^geo:wktLiteral . + +_:N7bfc9936b099cf9353fc57550000000c a geo:Geometry ; + geo:asWKT " POINT (-33.86 114.99)"^^geo:wktLiteral . + +_:N7bfc9936b099cf9353fc57550000000f a geo:Geometry ; + geo:asWKT " POINT (-33.86 114.99)"^^geo:wktLiteral . + +_:N7bfc9936b099cf9353fc575500000012 a geo:Geometry ; + geo:asWKT " POINT (-33.86 114.99)"^^geo:wktLiteral . + +_:N7bfc9936b099cf9353fc575500000015 a geo:Geometry ; + geo:asWKT " POINT (-33.86 114.99)"^^geo:wktLiteral . + +_:N7bfc9936b099cf9353fc575500000018 a geo:Geometry ; + geo:asWKT " POINT (-33.86 115.02)"^^geo:wktLiteral . + +_:N7bfc9936b099cf9353fc57550000001b a geo:Geometry ; + geo:asWKT " POINT (-33.86 115.02)"^^geo:wktLiteral . + +_:N7bfc9936b099cf9353fc57550000001e a geo:Geometry ; + geo:asWKT " POINT (-33.86 115.02)"^^geo:wktLiteral . + +_:N7bfc9936b099cf9353fc575500000021 a geo:Geometry ; + geo:asWKT " POINT (-33.8 115.21)"^^geo:wktLiteral . + +_:N7bfc9936b099cf9353fc575500000024 a geo:Geometry ; + geo:asWKT " POINT (-33.86 115.02)"^^geo:wktLiteral . + +_:N7bfc9936b099cf9353fc575500000027 a geo:Geometry ; + geo:asWKT " POINT (-33.86 115.02)"^^geo:wktLiteral ; + geo:hasMetricSpatialAccuracy 2e+01 . + +_:N7bfc9936b099cf9353fc57550000002a a geo:Geometry ; + geo:asWKT " POINT (-33.8 115.21)"^^geo:wktLiteral ; + geo:hasMetricSpatialAccuracy 5e+01 . + +_:N7bfc9936b099cf9353fc57550000002d a geo:Geometry ; + geo:asWKT " POINT (-33.8 115.21)"^^geo:wktLiteral ; + geo:hasMetricSpatialAccuracy 3e+01 . + +_:Nbaf38917362852ca9a9a0d2500000001 a geo:Geometry ; + geo:asWKT " POINT (-33.8 115.21)"^^geo:wktLiteral ; + rdfs:comment "Location unknown, location of field sampling used as proxy" . + +_:Nbaf38917362852ca9a9a0d2500000003 a geo:Geometry ; + geo:asWKT " POINT (-33.8 115.21)"^^geo:wktLiteral ; + rdfs:comment "Location unknown, location of field sampling used as proxy" . + +_:Nfcd8f1042640aeab2a686b1700000001 a geo:Geometry ; + geo:asWKT " POINT (-33.86 115.02)"^^geo:wktLiteral ; + rdfs:comment "Location unknown, location of field sampling used as proxy" . + +_:Nfcd8f1042640aeab2a686b1700000003 a geo:Geometry ; + geo:asWKT " POINT (-33.8 115.21)"^^geo:wktLiteral ; + rdfs:comment "Location unknown, location of field sampling used as proxy" . + +_:Nfcd8f1042640aeab2a686b1700000005 a geo:Geometry ; + geo:asWKT " POINT (-33.8 115.21)"^^geo:wktLiteral ; + rdfs:comment "Location unknown, location of field sampling used as proxy" . + diff --git a/abis_mapping/templates/survey_occurrence_data_v3/examples/organism_qty.csv b/abis_mapping/templates/survey_occurrence_data_v3/examples/organism_qty.csv new file mode 100644 index 00000000..a14070f7 --- /dev/null +++ b/abis_mapping/templates/survey_occurrence_data_v3/examples/organism_qty.csv @@ -0,0 +1,2 @@ +providerRecordID,providerRecordIDSource,locality,decimalLatitude,decimalLongitude,geodeticDatum,coordinateUncertaintyInMeters,dataGeneralizations,eventDateStart,eventDateEnd,samplingProtocol,basisOfRecord,recordedBy,recordNumber,occurrenceStatus,habitat,establishmentMeans,organismRemarks,individualCount,organismQuantity,organismQuantityType,lifeStage,sex,reproductiveCondition,ownerRecordID,ownerRecordIDSource,collectionCode,catalogNumber,catalogNumberSource,otherCatalogNumbers,otherCatalogNumbersSource,preparations,preparedDate,associatedSequences,sequencingMethod,verbatimIdentification,dateIdentified,identifiedBy,identificationMethod,scientificName,identificationQualifier,identificationRemarks,acceptedNameUsage,kingdom,taxonRank,threatStatus,conservationAuthority,threatStatusCheckProtocol,threatStatusDateDetermined,threatStatusDeterminedBy,sensitivityCategory,sensitivityAuthority,surveyID,siteID,siteVisitID +A0010,Gaia Resources,Cowaramup Bay Road,-33.8,115.21,WGS84,,,24/09/2019,,,,,,,,,,,0.05,percentageCoverage,,,,,,,,,,,,,,,,,,,Calothamnus lateralis var. crassus,,,,Plantae,,,,,,,,,,P1, diff --git a/abis_mapping/templates/survey_occurrence_data_v3/examples/organism_qty.ttl b/abis_mapping/templates/survey_occurrence_data_v3/examples/organism_qty.ttl new file mode 100644 index 00000000..986d700c --- /dev/null +++ b/abis_mapping/templates/survey_occurrence_data_v3/examples/organism_qty.ttl @@ -0,0 +1,103 @@ +@prefix abis: . +@prefix dwc: . +@prefix geo: . +@prefix prov: . +@prefix rdf: . +@prefix rdfs: . +@prefix schema: . +@prefix skos: . +@prefix sosa: . +@prefix tern: . +@prefix time: . +@prefix xsd: . + + a abis:BiodiversityRecord ; + schema:about ; + schema:identifier "A0010"^^ ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "Observation method unknown, 'human observation' used as proxy", + "organismQuantity-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-24"^^xsd:date ] ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "0.05 percentageCoverage" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a tern:Observation ; + rdfs:comment "scientificName-observation" ; + time:hasTime [ a time:Instant ; + rdfs:comment "Date unknown, template eventDateStart used as proxy" ; + time:inXSDDate "2019-09-24"^^xsd:date ] ; + sosa:hasFeatureOfInterest ; + sosa:hasResult ; + sosa:hasSimpleResult "Calothamnus lateralis var. crassus" ; + sosa:observedProperty ; + sosa:usedProcedure ; + schema:isPartOf . + + a rdfs:Datatype ; + skos:definition "An identifier for the record" ; + skos:prefLabel "Gaia Resources recordID" ; + prov:qualifiedAttribution . + + a tern:FeatureOfInterest, + tern:Site ; + schema:isPartOf ; + tern:featureType . + + a tern:Survey ; + schema:isPartOf . + + a prov:Attribution ; + prov:agent ; + prov:hadRole . + + a tern:Float, + tern:Value ; + rdfs:label "organism-quantity" ; + rdf:value "0.05"^^xsd:float ; + tern:unit . + + a tern:FeatureOfInterest, + tern:Text, + tern:Value ; + rdfs:label "scientificName" ; + rdf:value "Calothamnus lateralis var. crassus" ; + schema:isPartOf ; + tern:featureType . + + a prov:Agent ; + schema:name "Gaia Resources" . + + a dwc:Occurrence, + tern:FeatureOfInterest ; + sosa:isSampleOf ; + sosa:usedProcedure ; + schema:isPartOf , + ; + schema:spatial _:Nb0c3d4fa822b88b4d3f8743700000000 ; + schema:temporal [ a time:TemporalEntity ; + time:hasBeginning [ a time:Instant ; + time:inXSDDate "2019-09-24"^^xsd:date ] ] ; + tern:featureType ; + tern:locationDescription "Cowaramup Bay Road" . + + a tern:Dataset . + +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " POINT (-33.8 115.21)"^^geo:wktLiteral ] ; + rdf:object _:Nb0c3d4fa822b88b4d3f8743700000000 ; + rdf:predicate schema:spatial ; + rdf:subject ; + rdfs:comment "supplied as" . + +_:Nb0c3d4fa822b88b4d3f8743700000000 a geo:Geometry ; + geo:asWKT " POINT (-33.8 115.21)"^^geo:wktLiteral . + diff --git a/abis_mapping/templates/survey_occurrence_data_v3/mapping.py b/abis_mapping/templates/survey_occurrence_data_v3/mapping.py new file mode 100644 index 00000000..deb72a03 --- /dev/null +++ b/abis_mapping/templates/survey_occurrence_data_v3/mapping.py @@ -0,0 +1,4532 @@ +"""Provides ABIS Mapper for `survey_occurrence_data.csv` Template v3""" + +# Third-Party +import frictionless +import rdflib +import rdflib.term + +# Local +from abis_mapping import base +from abis_mapping import utils +from abis_mapping import plugins +from abis_mapping import models +from abis_mapping import vocabs + +# Typing +from typing import Any + + +# Constants and Shortcuts +# These constants and shortcuts are specific to this template, and as such are defined here +# rather than in a common `utils` module. +a = rdflib.RDF.type + +CONCEPT_AUSTRALIA = rdflib.URIRef("https://sws.geonames.org/2077456/") +CONCEPT_TAXON = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/70646576-6dc7-4bc5-a9d8-c4c366850df0") +CONCEPT_SITE = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/5bf7ae21-a454-440b-bdd7-f2fe982d8de4") +CONCEPT_ID_UNCERTAINTY = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/54e40f12-8c13-495a-9f8d-838d78faa5a7") +CONCEPT_ID_REMARKS = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/45a86abc-43c7-4a30-ac73-fc8d62538140") +CONCEPT_PROCEDURE_SAMPLING = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/7930424c-f2e1-41fa-9128-61524b67dbd5") +CONCEPT_SCIENTIFIC_NAME = utils.rdf.uri("concept/scientificName", utils.namespaces.EXAMPLE) # TODO -> Need real URI +CONCEPT_DATA_GENERALIZATIONS = utils.rdf.uri( + "concept/data-generalizations", utils.namespaces.EXAMPLE +) # TODO -> Need real URI +CONCEPT_TAXON_RANK = utils.rdf.uri("concept/taxonRank", utils.namespaces.EXAMPLE) # TODO -> Need real URI +CONCEPT_INDIVIDUAL_COUNT = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/74c71500-0bae-43c9-8db0-bd6940899af1") +CONCEPT_ORGANISM_REMARKS = utils.rdf.uri("concept/organismRemarks", utils.namespaces.EXAMPLE) # TODO -> Need real URI +CONCEPT_HABITAT = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/2090cfd9-8b6b-497b-9512-497456a18b99") +CONCEPT_BASIS_OF_RECORD = utils.rdf.uri("concept/basisOfRecord", utils.namespaces.EXAMPLE) # TODO -> Need real URI +CONCEPT_OCCURRENCE_STATUS = utils.rdf.uri("concept/occurrenceStatus", utils.namespaces.EXAMPLE) # TODO -> Need real URI +CONCEPT_PREPARATIONS = utils.rdf.uri("concept/preparations", utils.namespaces.EXAMPLE) # TODO -> Need real URI +CONCEPT_ESTABLISHMENT_MEANS = utils.rdf.uri( + "concept/establishmentMeans", utils.namespaces.EXAMPLE +) # TODO -> Need real URI +CONCEPT_LIFE_STAGE = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/abb0ee19-b2e8-42f3-8a25-d1f39ca3ebc3") +CONCEPT_SEX = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/05cbf534-c233-4aa8-a08c-00b28976ed36") +CONCEPT_REPRODUCTIVE_CONDITION = utils.rdf.uri( + "concept/reproductiveCondition", utils.namespaces.EXAMPLE +) # TODO -> Need real URI +CONCEPT_ACCEPTED_NAME_USAGE = utils.rdf.uri( + "concept/acceptedNameUsage", utils.namespaces.EXAMPLE +) # TODO -> Need real URI +CONCEPT_NAME_CHECK_METHOD = utils.rdf.uri( + "methods/name-check-method", utils.namespaces.EXAMPLE +) # TODO -> Need real URI +CONCEPT_SEQUENCE = utils.rdf.uri("concept/sequence", utils.namespaces.EXAMPLE) # TODO -> Need real URI +CONCEPT_CONSERVATION_STATUS = rdflib.URIRef( + "http://linked.data.gov.au/def/tern-cv/1466cc29-350d-4a23-858b-3da653fd24a6" +) +CONCEPT_CONSERVATION_AUTHORITY = rdflib.URIRef( + "http://linked.data.gov.au/def/tern-cv/755b1456-b76f-4d54-8690-10e41e25c5a7" +) +CONCEPT_SENSITIVITY_CATEGORY = utils.rdf.uri( + "concept/sensitiveCategory", utils.namespaces.EXAMPLE +) # TODO Need real URI +CONCEPT_ORGANISM_QUANTITY = utils.rdf.uri("concept/organismQuantity", utils.namespaces.EXAMPLE) # TODO Need real URI + +# Roles +DATA_ROLE_RESOURCE_PROVIDER = rdflib.URIRef("https://linked.data.gov.au/def/data-roles/resourceProvider") +DATA_ROLE_OWNER = rdflib.URIRef("https://linked.data.gov.au/def/data-roles/owner") + + +class SurveyOccurrenceMapper(base.mapper.ABISMapper): + """ABIS Mapper for `survey_occurrence_data.csv` v3""" + + def apply_validation(self, data: base.types.ReadableType, **kwargs: Any) -> frictionless.Report: + """Applies Frictionless Validation for the `survey_occurrence_data.csv` Template + + Args: + data (base.types.ReadableType): Raw data to be validated. + **kwargs (Any): Additional keyword arguments. + + Keyword Args: + site_id_geometry_map (dict[str, str]): Default values to use for geometry + for given siteID. + site_visit_id_temporal_map (dict[str, str]): Default RDF (serialized as turtle) + to use for temporal entity for given siteVisitID. + site_visit_id_site_id_map (dict[str, str]): Valid site ID for a given site visit ID. + + Returns: + frictionless.Report: Validation report for the specified data. + """ + # Extract kwargs + site_id_geometry_map = kwargs.get("site_id_geometry_map") + site_visit_id_temporal_map = kwargs.get("site_visit_id_temporal_map") + site_visit_id_site_id_map = kwargs.get("site_visit_id_site_id_map") + + # Construct Schema + schema = self.extra_fields_schema( + data=data, + full_schema=True, + ) + + # Construct default Checklist + checklist = frictionless.Checklist( + checks=[ + # Extra Custom Checks + plugins.tabular.IsTabular(), + plugins.empty.NotEmpty(), + plugins.chronological.ChronologicalOrder( + field_names=["eventDateStart", "eventDateEnd"], + ), + plugins.mutual_inclusion.MutuallyInclusive( + field_names=["threatStatus", "conservationAuthority"], + ), + plugins.mutual_inclusion.MutuallyInclusive( + field_names=["organismQuantity", "organismQuantityType"], + ), + plugins.mutual_inclusion.MutuallyInclusive( + field_names=["catalogNumber", "catalogNumberSource"], + ), + plugins.mutual_inclusion.MutuallyInclusive( + field_names=["otherCatalogNumbers", "otherCatalogNumbersSource"], + ), + plugins.mutual_inclusion.MutuallyInclusive( + field_names=["ownerRecordID", "ownerRecordIDSource"], + ), + plugins.mutual_inclusion.MutuallyInclusive( + field_names=["sensitivityCategory", "sensitivityAuthority"], + ), + plugins.chained_inclusion.ChainedInclusion( + field_names=["siteVisitID", "siteID"], + ), + ], + ) + + # Modify checklist in the event site visit id to site id map provided + if site_visit_id_site_id_map is not None: + # Add lookup match check + checklist.add_check( + plugins.lookup_match.VLookupMatch( + key_field="siteVisitID", + value_field="siteID", + lu_map=site_visit_id_site_id_map, + ) + ) + + # Modify schema and checklist in the event default temporal map provided + if site_visit_id_temporal_map is not None: + # Need to make sure that required is false for the eventDateStart field + # since this would override the default lookup check. + schema.get_field("eventDateStart").constraints["required"] = False + + # Perform a default lookup check based on passed in map. + checklist.add_check( + plugins.default_lookup.DefaultLookup( + key_field="siteVisitID", + value_field="eventDateStart", + default_map=site_visit_id_temporal_map, + ) + ) + + # Modify schema and checklist in the event default geometry map provided + if site_id_geometry_map is not None: + # We need to make sure that required is false from the lat long fields + # since this would override the default lookup checks + for field_name in ["decimalLatitude", "decimalLongitude", "geodeticDatum"]: + schema.get_field(field_name).constraints["required"] = False + + # Perform a default lookup check based on passed in map. + checklist.add_check( + plugins.default_lookup.DefaultLookup( + key_field="siteID", + value_field="decimalLatitude", + default_map=site_id_geometry_map, + ) + ) + # Mutual inclusion check to close out the possibility of one missing. + checklist.add_check( + plugins.mutual_inclusion.MutuallyInclusive( + field_names=["decimalLatitude", "decimalLongitude", "geodeticDatum"] + ) + ) + + # Construct Resource (Table with Schema) + resource = frictionless.Resource( + source=data, + format="csv", # TODO -> Hardcoded to csv for now + schema=schema, + encoding="utf-8", + ) + + # Validate + report: frictionless.Report = resource.validate(checklist=checklist) + + # Return Validation Report + return report + + def extract_site_id_keys( + self, + data: base.types.ReadableType, + ) -> dict[str, bool]: + """Extract site id key values from the data. + + Args: + data (base.types.ReadableType): Raw data to be mapped. + + Returns: + dict[str, bool]: Keys are the site id values encountered + in the data, values are all 'True', + """ + # Construct schema + schema = frictionless.Schema.from_descriptor(self.schema()) + + # Construct resource + resource = frictionless.Resource( + source=data, + format="csv", + schema=schema, + encoding="utf-8", + ) + + # Iterate over rows to extract values + with resource.open() as r: + # Construct dictionary and return + return {row["siteID"]: True for row in r.row_stream if row["siteID"] is not None} + + def extract_site_visit_id_keys( + self, + data: base.types.ReadableType, + ) -> dict[str, bool]: + """Extract site visit id key values from the data. + + Args: + data (base.types.ReadableType): Raw data to be mapped. + + Returns: + dict[str, bool]: Keys are the site visit id values encountered + in the data, values are all 'True', + """ + # Construct schema + schema = frictionless.Schema.from_descriptor(self.schema()) + + # Construct resource + resource = frictionless.Resource( + source=data, + format="csv", + schema=schema, + encoding="utf-8", + ) + + # Iterate over rows to extract values + with resource.open() as r: + # Construct dictionary and return + return {row["siteVisitID"]: True for row in r.row_stream if row["siteVisitID"]} + + def apply_mapping_row( + self, + *, + row: frictionless.Row, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + extra_schema: frictionless.Schema, + base_iri: rdflib.Namespace, + **kwargs: Any, + ) -> None: + """Applies Mapping for a Row in the `survey_occurrence_data.csv` Template + + Args: + row (frictionless.Row): Row to be processed in the dataset. + dataset (rdflib.URIRef): Dataset uri this row is a part of. + graph (rdflib.Graph): Graph to map row into. + extra_schema (frictionless.Schema): Schema of extra fields. + base_iri (rdflib.Namespace): Optional base IRI namespace to use for mapping. + + Keyword Args: + site_id_geometry_map (dict[str, str] | None): Optional site id to geometry + default map. + site_visit_id_temporal_map (dict[str, str] | None): Optional site visit id + to temporal entity rdf default map. + + Returns: + rdflib.Graph: Graph with row mapped into it. + """ + site_id_geometry_map = kwargs.get("site_id_geometry_map") + site_visit_id_temporal_map = kwargs.get("site_visit_id_temporal_map") + + # Get values from row + provider_record_id: str = row["providerRecordID"] + provider_record_id_source: str = row["providerRecordIDSource"] + + # Create URIs + provider_identified = utils.iri_patterns.agent_iri(row["identifiedBy"]) + sample_specimen = utils.iri_patterns.sample_iri(base_iri, "specimen", provider_record_id) + sampling_specimen = utils.iri_patterns.sampling_iri(base_iri, "specimen", provider_record_id) + sample_sequence = utils.iri_patterns.sample_iri(base_iri, "sequence", provider_record_id) + sampling_sequencing = utils.iri_patterns.sampling_iri(base_iri, "sequencing", provider_record_id) + provider_determined_by = utils.iri_patterns.agent_iri(row["threatStatusDeterminedBy"]) + + provider_record_id_datatype = utils.iri_patterns.datatype_iri("recordID", provider_record_id_source) + provider_record_id_agent = utils.iri_patterns.agent_iri(provider_record_id_source) + provider_record_id_attribution = utils.iri_patterns.attribution_iri( + base_iri, "resourceProvider", provider_record_id_source + ) + provider_record_id_occurrence = utils.rdf.uri(f"occurrence/{provider_record_id}", base_iri) + provider_record_id_biodiversity_record = utils.rdf.uri(f"biodiversityRecord/{provider_record_id}", base_iri) + + # Create URIs for Observations and Observation Values + observation_scientific_name = utils.iri_patterns.observation_iri(base_iri, "scientificName", provider_record_id) + text_scientific_name = utils.iri_patterns.observation_value_iri( + base_iri, "scientificName", row["scientificName"] + ) + individual_count_observation = utils.iri_patterns.observation_iri( + base_iri, "individualCount", provider_record_id + ) + individual_count_value = utils.iri_patterns.observation_value_iri( + base_iri, "individualCount", row["individualCount"] + ) + organism_remarks_observation = utils.iri_patterns.observation_iri( + base_iri, "organismRemarks", provider_record_id + ) + organism_remarks_value = utils.iri_patterns.observation_value_iri( + base_iri, "organismRemarks", row["organismRemarks"] + ) + organism_quantity_observation = utils.iri_patterns.observation_iri( + base_iri, "organismQuantity", provider_record_id + ) + organism_quantity_value = utils.iri_patterns.observation_value_iri( + base_iri, "organismQuantity", row["organismQuantity"] + ) + occurrence_status_observation = utils.iri_patterns.observation_iri( + base_iri, "occurrenceStatus", provider_record_id + ) + occurrence_status_value = utils.iri_patterns.observation_value_iri( + base_iri, "occurrenceStatus", row["occurrenceStatus"] + ) + establishment_means_observation = utils.iri_patterns.observation_iri( + base_iri, "establishmentMeans", provider_record_id + ) + establishment_means_value = utils.iri_patterns.observation_value_iri( + base_iri, "establishmentMeans", row["establishmentMeans"] + ) + accepted_name_usage_observation = utils.iri_patterns.observation_iri( + base_iri, "acceptedNameUsage", provider_record_id + ) + accepted_name_usage_value = utils.iri_patterns.observation_value_iri( + base_iri, "acceptedNameUsage", row["acceptedNameUsage"] + ) + threat_status_observation = utils.iri_patterns.observation_iri(base_iri, "threatStatus", provider_record_id) + threat_status_value = utils.iri_patterns.observation_value_iri(base_iri, "threatStatus", row["threatStatus"]) + + # Create URIs for Observations+Values that depend on if there is a specimen. + if has_specimen(row): + _specimen_dependant_observation_iri = utils.iri_patterns.specimen_observation_iri + _specimen_dependant_observation_value_iri = utils.iri_patterns.specimen_observation_value_iri + else: + _specimen_dependant_observation_iri = utils.iri_patterns.observation_iri + _specimen_dependant_observation_value_iri = utils.iri_patterns.observation_value_iri + observation_verbatim_id = _specimen_dependant_observation_iri( + base_iri, "verbatimIdentification", provider_record_id + ) + text_verbatim_id = _specimen_dependant_observation_value_iri( + base_iri, "verbatimIdentification", row["verbatimIdentification"] + ) + life_stage_observation = _specimen_dependant_observation_iri(base_iri, "lifeStage", provider_record_id) + life_stage_value = _specimen_dependant_observation_value_iri(base_iri, "lifeStage", row["lifeStage"]) + sex_observation = _specimen_dependant_observation_iri(base_iri, "sex", provider_record_id) + sex_value = _specimen_dependant_observation_value_iri(base_iri, "sex", row["sex"]) + reproductive_condition_observation = _specimen_dependant_observation_iri( + base_iri, "reproductiveCondition", provider_record_id + ) + reproductive_condition_value = _specimen_dependant_observation_value_iri( + base_iri, "reproductiveCondition", row["reproductiveCondition"] + ) + + # Conditionally create uris dependant of dataGeneralizations field + if data_generalizations := row["dataGeneralizations"]: + data_generalizations_attribute = utils.iri_patterns.attribute_iri( + base_iri, "dataGeneralizations", data_generalizations + ) + data_generalizations_value = utils.iri_patterns.attribute_value_iri( + base_iri, "dataGeneralizations", data_generalizations + ) + data_generalizations_collection = utils.iri_patterns.attribute_collection_iri( + base_iri, "Occurrence", "dataGeneralizations", data_generalizations + ) + else: + data_generalizations_attribute = None + data_generalizations_value = None + data_generalizations_collection = None + + # Conditionally create uris dependant of basisOfRecord field + if basis_of_record := row["basisOfRecord"]: + basis_attribute = utils.iri_patterns.attribute_iri(base_iri, "basisOfRecord", basis_of_record) + basis_value = utils.iri_patterns.attribute_value_iri(base_iri, "basisOfRecord", basis_of_record) + basis_collection = utils.iri_patterns.attribute_collection_iri( + base_iri, "Occurrence", "basisOfRecord", basis_of_record + ) + else: + basis_attribute = None + basis_value = None + basis_collection = None + + # Conditionally create uris dependent on recordedBy field. + if recorded_by := row["recordedBy"]: + record_number_datatype = utils.iri_patterns.datatype_iri("recordNumber", recorded_by) + provider_recorded_by = utils.iri_patterns.agent_iri(recorded_by) + else: + record_number_datatype = None + provider_recorded_by = None + + # Conditionally create uris dependent on habitat field. + if habitat := row["habitat"]: + habitat_attribute = utils.iri_patterns.attribute_iri(base_iri, "habitat", habitat) + habitat_value = utils.iri_patterns.attribute_value_iri(base_iri, "habitat", habitat) + habitat_collection = utils.iri_patterns.attribute_collection_iri(base_iri, "Occurrence", "habitat", habitat) + else: + habitat_attribute = None + habitat_value = None + habitat_collection = None + + # Conditionally create uris dependent on ownerRecordIDSource field + if owner_record_id_source := row["ownerRecordIDSource"]: + owner_record_id_datatype = utils.iri_patterns.datatype_iri("recordID", owner_record_id_source) + owner_record_id_provider = utils.iri_patterns.agent_iri(owner_record_id_source) + owner_record_id_attribution = utils.iri_patterns.attribution_iri(base_iri, "owner", owner_record_id_source) + else: + owner_record_id_datatype = None + owner_record_id_provider = None + owner_record_id_attribution = None + + # Conditionally create uris dependent on catalogNumberSource field. + if catalog_number_source := row["catalogNumberSource"]: + catalog_number_datatype = utils.iri_patterns.datatype_iri("catalogNumber", catalog_number_source) + catalog_number_provider = utils.iri_patterns.agent_iri(catalog_number_source) + else: + catalog_number_datatype = None + catalog_number_provider = None + + # Conditionally create uris dependent on otherCatalogNumbersSource field. + if other_catalog_numbers_source := row["otherCatalogNumbersSource"]: + other_catalog_numbers_datatype = utils.iri_patterns.datatype_iri( + "catalogNumber", other_catalog_numbers_source + ) + other_catalog_numbers_provider = utils.iri_patterns.agent_iri(other_catalog_numbers_source) + else: + other_catalog_numbers_datatype = None + other_catalog_numbers_provider = None + + # Conditionally create uris dependent on preparations field + if preparations := row["preparations"]: + preparations_attribute = utils.iri_patterns.attribute_iri(base_iri, "preparations", preparations) + preparations_value = utils.iri_patterns.attribute_value_iri(base_iri, "preparations", preparations) + preparations_collection = utils.iri_patterns.attribute_collection_iri( + base_iri, "Occurrence", "preparations", preparations + ) + else: + preparations_attribute = None + preparations_value = None + preparations_collection = None + + # Conditionally create IRIs for the identificationQualifier field + id_qualifier: str | None = row["identificationQualifier"] + if id_qualifier: + id_qualifier_attribute = utils.iri_patterns.attribute_iri(base_iri, "identificationQualifier", id_qualifier) + id_qualifier_value = utils.iri_patterns.attribute_value_iri( + base_iri, "identificationQualifier", id_qualifier + ) + id_qualifier_collection = utils.iri_patterns.attribute_collection_iri( + base_iri, "Occurrence", "identificationQualifier", id_qualifier + ) + else: + id_qualifier_attribute = None + id_qualifier_value = None + id_qualifier_collection = None + + # Conditionally create IRIs for the identificationRemarks field + id_remarks: str | None = row["identificationRemarks"] + if id_remarks: + id_remarks_attribute = utils.iri_patterns.attribute_iri(base_iri, "identificationRemarks", id_remarks) + id_remarks_value = utils.iri_patterns.attribute_value_iri(base_iri, "identificationRemarks", id_remarks) + id_remarks_collection = utils.iri_patterns.attribute_collection_iri( + base_iri, "Occurrence", "identificationRemarks", id_remarks + ) + else: + id_remarks_attribute = None + id_remarks_value = None + id_remarks_collection = None + + # Conditionally create IRIs for the taxonRank field + taxon_rank: str | None = row["taxonRank"] + if taxon_rank: + taxon_rank_attribute = utils.iri_patterns.attribute_iri(base_iri, "taxonRank", taxon_rank) + taxon_rank_value = utils.iri_patterns.attribute_value_iri(base_iri, "taxonRank", taxon_rank) + taxon_rank_collection = utils.iri_patterns.attribute_collection_iri( + base_iri, "Occurrence", "taxonRank", taxon_rank + ) + else: + taxon_rank_attribute = None + taxon_rank_value = None + taxon_rank_collection = None + + # Conditionally create IRIs for the conservationAuthority field + conservation_authority: str | None = row["conservationAuthority"] + if conservation_authority: + conservation_authority_attribute = utils.iri_patterns.attribute_iri( + base_iri, "conservationAuthority", conservation_authority + ) + conservation_authority_value = utils.iri_patterns.attribute_value_iri( + base_iri, "conservationAuthority", conservation_authority + ) + conservation_authority_collection = utils.iri_patterns.attribute_collection_iri( + base_iri, "Occurrence", "conservationAuthority", conservation_authority + ) + else: + conservation_authority_attribute = None + conservation_authority_value = None + conservation_authority_collection = None + + # Conditionally create IRIs for the sensitivityCategory field + sensitivity_category: str | None = row["sensitivityCategory"] + if sensitivity_category: + sensitivity_category_attribute = utils.iri_patterns.attribute_iri( + base_iri, "sensitivityCategory", sensitivity_category + ) + sensitivity_category_value = utils.iri_patterns.attribute_value_iri( + base_iri, "sensitivityCategory", sensitivity_category + ) + sensitivity_category_collection = utils.iri_patterns.attribute_collection_iri( + base_iri, "Occurrence", "sensitivityCategory", sensitivity_category + ) + else: + sensitivity_category_attribute = None + sensitivity_category_value = None + sensitivity_category_collection = None + + # Create URIs for Survey-related fields (i.e. fields not on the incidental template) + + # Create TERN survey IRI from surveyID field + survey_id: str | None = row["surveyID"] + survey = utils.iri_patterns.survey_iri(base_iri, survey_id) + + # Create Tern Site IRI, depending on the siteID field + site_id: str | None = row["siteID"] + if site_id: + site = utils.iri_patterns.site_iri(base_iri, site_id) + else: + site = None + + # Conditionally create uri dependent on siteVisitID field. + site_visit_id: str | None = row["siteVisitID"] + if site_visit_id: + # Create TERN.SiteVisit subject IRI - Note this needs to match the iri construction of the + # site visit template mapping, ensuring they will resolve properly. + site_visit = utils.iri_patterns.site_visit_iri(base_iri, site_visit_id) + else: + site_visit = None + + # Add Provider Identified By + self.add_provider_identified( + uri=provider_identified, + row=row, + graph=graph, + ) + + # Add Provider Recorded By + self.add_provider_recorded( + uri=provider_recorded_by, + row=row, + graph=graph, + ) + + # Add record number datatype + self.add_record_number_datatype( + uri=record_number_datatype, + provider=provider_recorded_by, + row=row, + graph=graph, + ) + + # Add provider provider agent + self.add_provider_recorded_by_agent( + uri=provider_recorded_by, + row=row, + graph=graph, + ) + + # Add owner record id datatype + self.add_record_id_datatype( + uri=owner_record_id_datatype, + attribution=owner_record_id_attribution, + value=owner_record_id_source, + graph=graph, + ) + + # Add attribution for record id datatype + self.add_attribution( + uri=owner_record_id_attribution, + provider=owner_record_id_provider, + provider_role_type=DATA_ROLE_OWNER, + graph=graph, + ) + + # Add the provider owner record id + self.add_owner_record_id_provider( + uri=owner_record_id_provider, + row=row, + graph=graph, + ) + + # Add provider record ID datatype + self.add_record_id_datatype( + uri=provider_record_id_datatype, + attribution=provider_record_id_attribution, + value=provider_record_id_source, + graph=graph, + ) + + # Add provider record ID attribution + self.add_attribution( + uri=provider_record_id_attribution, + provider=provider_record_id_agent, + provider_role_type=DATA_ROLE_RESOURCE_PROVIDER, + graph=graph, + ) + + # Add provider agent + self.add_provider_record_id_agent( + uri=provider_record_id_agent, + row=row, + graph=graph, + ) + + # Add Sample Specimen + self.add_sample_specimen( + uri=sample_specimen, + row=row, + dataset=dataset, + sampling_specimen=sampling_specimen, + provider_record_id_occurrence=provider_record_id_occurrence, + graph=graph, + base_iri=base_iri, + ) + + # Add catalog number datatype + self.add_catalog_number_datatype( + uri=catalog_number_datatype, + provider=catalog_number_provider, + value=catalog_number_source, + graph=graph, + ) + + # Add catalog number provider + self.add_catalog_number_provider( + uri=catalog_number_provider, + row=row, + graph=graph, + ) + + # Add other catalog numbers datatype + self.add_catalog_number_datatype( + uri=other_catalog_numbers_datatype, + provider=other_catalog_numbers_provider, + value=other_catalog_numbers_source, + graph=graph, + ) + + # Add other catalog numbers provider + self.add_other_catalog_numbers_provider( + uri=other_catalog_numbers_provider, + row=row, + graph=graph, + ) + + # Add Sampling Specimen + self.add_sampling_specimen( + uri=sampling_specimen, + row=row, + dataset=dataset, + provider_record_id_occurrence=provider_record_id_occurrence, + sample_specimen=sample_specimen, + site_id_geometry_map=site_id_geometry_map, + site_visit_id_temporal_map=site_visit_id_temporal_map, + graph=graph, + ) + + # Add Text for Scientific Name + self.add_text_scientific_name( + uri=text_scientific_name, + dataset=dataset, + row=row, + graph=graph, + ) + + # Add Identification Qualifier Attribute + self.add_id_qualifier_attribute( + uri=id_qualifier_attribute, + id_qualifier=id_qualifier, + id_qualifier_value=id_qualifier_value, + dataset=dataset, + graph=graph, + ) + + # Add Identification Qualifier Value + self.add_id_qualifier_value( + uri=id_qualifier_value, + id_qualifier=id_qualifier, + dataset=dataset, + graph=graph, + base_iri=base_iri, + ) + + # Add Identification Qualifier Collection + self.add_id_qualifier_collection( + uri=id_qualifier_collection, + id_qualifier=id_qualifier, + id_qualifier_attribute=id_qualifier_attribute, + observation_scientific_name=observation_scientific_name, + dataset=dataset, + graph=graph, + ) + + # Add Identification Remarks Attribute + self.add_id_remarks_attribute( + uri=id_remarks_attribute, + id_remarks=id_remarks, + id_remarks_value=id_remarks_value, + dataset=dataset, + graph=graph, + ) + + # Add Identification Remarks Value + self.add_id_remarks_value( + uri=id_remarks_value, + id_remarks=id_remarks, + graph=graph, + ) + + # Add identification Remarks collection + self.add_id_remarks_collection( + uri=id_remarks_collection, + id_remarks=id_remarks, + id_remarks_attribute=id_remarks_attribute, + observation_scientific_name=observation_scientific_name, + dataset=dataset, + graph=graph, + ) + + # Add Text for Verbatim ID + self.add_text_verbatim_id( + uri=text_verbatim_id, + row=row, + graph=graph, + ) + + # Add Observation for Scientific Name + self.add_observation_scientific_name( + uri=observation_scientific_name, + row=row, + dataset=dataset, + provider=provider_identified, + provider_record_id_occurrence=provider_record_id_occurrence, + scientific_name=text_scientific_name, + site_visit_id_temporal_map=site_visit_id_temporal_map, + graph=graph, + base_iri=base_iri, + ) + + # Add Observation for Verbatim ID + self.add_observation_verbatim_id( + uri=observation_verbatim_id, + row=row, + dataset=dataset, + provider=provider_identified, + provider_record_id_occurrence=provider_record_id_occurrence, + sample_specimen=sample_specimen, + verbatim_id=text_verbatim_id, + site_visit_id_temporal_map=site_visit_id_temporal_map, + graph=graph, + base_iri=base_iri, + ) + + # Add Data Generalizations Attribute + self.add_data_generalizations_attribute( + uri=data_generalizations_attribute, + data_generalizations=data_generalizations, + dataset=dataset, + data_generalizations_value=data_generalizations_value, + graph=graph, + ) + + # Add Data Generalizations Value + self.add_data_generalizations_value( + uri=data_generalizations_value, + data_generalizations=data_generalizations, + graph=graph, + ) + + # Add Data Generalizations Sample Collection + self.add_data_generalizations_collection( + uri=data_generalizations_collection, + data_generalizations=data_generalizations, + data_generalizations_attribute=data_generalizations_attribute, + provider_record_id_occurrence=provider_record_id_occurrence, + dataset=dataset, + graph=graph, + ) + + # Add Taxon Rank Attribute + self.add_taxon_rank_attribute( + uri=taxon_rank_attribute, + taxon_rank=taxon_rank, + dataset=dataset, + taxon_rank_value=taxon_rank_value, + graph=graph, + ) + + # Add Taxon Rank Value + self.add_taxon_rank_value( + uri=taxon_rank_value, + taxon_rank=taxon_rank, + dataset=dataset, + graph=graph, + base_iri=base_iri, + ) + + # Add Taxon Rank collection + self.add_taxon_rank_collection( + uri=taxon_rank_collection, + taxon_rank=taxon_rank, + taxon_rank_attribute=taxon_rank_attribute, + observation_scientific_name=observation_scientific_name, + dataset=dataset, + graph=graph, + ) + + # Add Individual Count Observation + self.add_individual_count_observation( + uri=individual_count_observation, + row=row, + dataset=dataset, + provider_record_id_occurrence=provider_record_id_occurrence, + individual_count_value=individual_count_value, + site_visit_id_temporal_map=site_visit_id_temporal_map, + graph=graph, + ) + + # Add Individual Count Value + self.add_individual_count_value( + uri=individual_count_value, + row=row, + graph=graph, + ) + + # Add Organism Remarks Observation + self.add_organism_remarks_observation( + uri=organism_remarks_observation, + row=row, + dataset=dataset, + provider_record_id_occurrence=provider_record_id_occurrence, + organism_remarks_value=organism_remarks_value, + site_visit_id_temporal_map=site_visit_id_temporal_map, + graph=graph, + ) + + # Add Organism Remarks Value + self.add_organism_remarks_value( + uri=organism_remarks_value, + row=row, + graph=graph, + ) + + # Add Habitat Attribute + self.add_habitat_attribute( + uri=habitat_attribute, + habitat=habitat, + dataset=dataset, + habitat_value=habitat_value, + graph=graph, + ) + + # Add Habitat Value + self.add_habitat_value( + uri=habitat_value, + habitat=habitat, + dataset=dataset, + graph=graph, + base_iri=base_iri, + ) + + # Add habitat attribute sample collection + self.add_habitat_collection( + uri=habitat_collection, + habitat=habitat, + habitat_attribute=habitat_attribute, + provider_record_id_occurrence=provider_record_id_occurrence, + dataset=dataset, + graph=graph, + ) + + # Add Basis of Record Attribute + self.add_basis_attribute( + uri=basis_attribute, + basis_of_record=basis_of_record, + dataset=dataset, + basis_value=basis_value, + graph=graph, + ) + + # Add Basis of Record Value + self.add_basis_value( + uri=basis_value, + basis_of_record=basis_of_record, + dataset=dataset, + graph=graph, + base_iri=base_iri, + ) + + # Add Basis of Record Sample Collection + self.add_basis_collection( + uri=basis_collection, + basis_of_record=basis_of_record, + basis_attribute=basis_attribute, + sample_specimen=sample_specimen, + provider_record_id_occurrence=provider_record_id_occurrence, + row=row, + dataset=dataset, + graph=graph, + ) + + # Add Owner Institution Provider + self.add_owner_institution_provider( + uri=owner_record_id_provider, + row=row, + graph=graph, + ) + + # Add Occurrence Status Observation + self.add_occurrence_status_observation( + uri=occurrence_status_observation, + row=row, + dataset=dataset, + provider_record_id_occurrence=provider_record_id_occurrence, + occurrence_status_value=occurrence_status_value, + site_visit_id_temporal_map=site_visit_id_temporal_map, + graph=graph, + ) + + # Add Occurrence Status Value + self.add_occurrence_status_value( + uri=occurrence_status_value, + row=row, + dataset=dataset, + graph=graph, + base_iri=base_iri, + ) + + # Add Preparations Attribute + self.add_preparations_attribute( + uri=preparations_attribute, + preparations=preparations, + dataset=dataset, + preparations_value=preparations_value, + graph=graph, + ) + + # Add Preparations Value + self.add_preparations_value( + uri=preparations_value, + preparations=preparations, + dataset=dataset, + graph=graph, + base_iri=base_iri, + ) + + # Add Preparations attribute Sample Collection + self.add_preparations_collection( + uri=preparations_collection, + preparations=preparations, + preparations_attribute=preparations_attribute, + sample_specimen=sample_specimen, + dataset=dataset, + graph=graph, + ) + + # Add Establishment Means Observation + self.add_establishment_means_observation( + uri=establishment_means_observation, + row=row, + dataset=dataset, + provider_record_id_occurrence=provider_record_id_occurrence, + establishment_means_value=establishment_means_value, + site_visit_id_temporal_map=site_visit_id_temporal_map, + graph=graph, + ) + + # Add Establishment Means Value + self.add_establishment_means_value( + uri=establishment_means_value, + row=row, + dataset=dataset, + graph=graph, + base_iri=base_iri, + ) + + # Add Life Stage Observation + self.add_life_stage_observation( + uri=life_stage_observation, + row=row, + dataset=dataset, + provider_record_id_occurrence=provider_record_id_occurrence, + sample_specimen=sample_specimen, + life_stage_value=life_stage_value, + site_visit_id_temporal_map=site_visit_id_temporal_map, + graph=graph, + ) + + # Add Life Stage Value + self.add_life_stage_value( + uri=life_stage_value, + row=row, + dataset=dataset, + graph=graph, + base_iri=base_iri, + ) + + # Add Sex Observation + self.add_sex_observation( + uri=sex_observation, + row=row, + dataset=dataset, + provider_record_id_occurrence=provider_record_id_occurrence, + sample_specimen=sample_specimen, + sex_value=sex_value, + site_visit_id_temporal_map=site_visit_id_temporal_map, + graph=graph, + ) + + # Add Sex Value + self.add_sex_value( + uri=sex_value, + row=row, + dataset=dataset, + graph=graph, + base_iri=base_iri, + ) + + # Add Reproductive Condition Observation + self.add_reproductive_condition_observation( + uri=reproductive_condition_observation, + row=row, + dataset=dataset, + provider_record_id_occurrence=provider_record_id_occurrence, + sample_specimen=sample_specimen, + reproductive_condition_value=reproductive_condition_value, + site_visit_id_temporal_map=site_visit_id_temporal_map, + graph=graph, + ) + + # Add Reproductive Condition Value + self.add_reproductive_condition_value( + uri=reproductive_condition_value, + row=row, + dataset=dataset, + graph=graph, + base_iri=base_iri, + ) + + # Add Accepted Name Usage Observation + self.add_accepted_name_usage_observation( + uri=accepted_name_usage_observation, + row=row, + dataset=dataset, + scientific_name=text_scientific_name, + accepted_name_usage_value=accepted_name_usage_value, + site_visit_id_temporal_map=site_visit_id_temporal_map, + graph=graph, + ) + + # Add Accepted Name Usage Value + self.add_accepted_name_usage_value( + uri=accepted_name_usage_value, + dataset=dataset, + row=row, + graph=graph, + ) + + # Add Sampling Sequencing + self.add_sampling_sequencing( + uri=sampling_sequencing, + row=row, + dataset=dataset, + feature_of_interest=sample_specimen, + sample_sequence=sample_sequence, + site_id_geometry_map=site_id_geometry_map, + site_visit_id_temporal_map=site_visit_id_temporal_map, + graph=graph, + base_iri=base_iri, + ) + + # Add Sample Sequence + self.add_sample_sequence( + uri=sample_sequence, + row=row, + dataset=dataset, + feature_of_interest=sample_specimen, + sampling_sequencing=sampling_sequencing, + graph=graph, + ) + + # Add Provider Threat Status Determined By + self.add_provider_determined_by( + uri=provider_determined_by, + row=row, + graph=graph, + ) + + # Add Threat Status Observation + self.add_threat_status_observation( + uri=threat_status_observation, + row=row, + dataset=dataset, + provider_record_id_occurrence=provider_record_id_occurrence, + threat_status_value=threat_status_value, + determined_by=provider_determined_by, + site_visit_id_temporal_map=site_visit_id_temporal_map, + graph=graph, + base_iri=base_iri, + ) + + # Add Threat Status Value + self.add_threat_status_value( + uri=threat_status_value, + row=row, + dataset=dataset, + graph=graph, + base_iri=base_iri, + ) + + # Add Conservation Authority Attribute + self.add_conservation_authority_attribute( + uri=conservation_authority_attribute, + conservation_authority=conservation_authority, + conservation_authority_value=conservation_authority_value, + dataset=dataset, + graph=graph, + ) + + # Add Conservation Authority Value + self.add_conservation_authority_value( + uri=conservation_authority_value, + conservation_authority=conservation_authority, + graph=graph, + ) + + # Add conservation Authority Collection + self.add_conservation_authority_collection( + uri=conservation_authority_collection, + conservation_authority=conservation_authority, + conservation_authority_attribute=conservation_authority_attribute, + threat_status_observation=threat_status_observation, + dataset=dataset, + graph=graph, + ) + + # Add organism quantity observation + self.add_organism_quantity_observation( + uri=organism_quantity_observation, + provider_record_id_occurrence=provider_record_id_occurrence, + dataset=dataset, + row=row, + site_visit_id_temporal_map=site_visit_id_temporal_map, + graph=graph, + ) + + # Add organism quantity value + self.add_organism_quantity_value( + uri=organism_quantity_value, + organism_qty_observation=organism_quantity_observation, + dataset=dataset, + row=row, + graph=graph, + base_iri=base_iri, + ) + + # Add site + self.add_site( + uri=site, + dataset=dataset, + graph=graph, + ) + + # Add Sensitivity Category Attribute + self.add_sensitivity_category_attribute( + uri=sensitivity_category_attribute, + row=row, + dataset=dataset, + sensitivity_category_value=sensitivity_category_value, + graph=graph, + ) + + # Add Sensitivity Category Value + self.add_sensitivity_category_value( + uri=sensitivity_category_value, + row=row, + dataset=dataset, + graph=graph, + base_iri=base_iri, + ) + + # Add Sensitivity Category Collection + self.add_sensitivity_category_collection( + uri=sensitivity_category_collection, + sensitivity_category=sensitivity_category, + sensitivity_category_attribute=sensitivity_category_attribute, + provider_record_id_biodiversity_record=provider_record_id_biodiversity_record, + dataset=dataset, + graph=graph, + ) + + # Add Survey + self.add_survey( + uri=survey, + dataset=dataset, + graph=graph, + ) + + # Add biodiversity record + self.add_biodiversity_record( + uri=provider_record_id_biodiversity_record, + provider_record_id_datatype=provider_record_id_datatype, + provider_record_id_occurrence=provider_record_id_occurrence, + dataset=dataset, + row=row, + graph=graph, + ) + + # Add occurrence + self.add_occurrence( + uri=provider_record_id_occurrence, + record_number_datatype=record_number_datatype, + owner_record_id_datatype=owner_record_id_datatype, + other_catalog_numbers_datatype=other_catalog_numbers_datatype, + catalog_number_datatype=catalog_number_datatype, + provider_recorded_by=provider_recorded_by, + survey=survey, + site=site, + site_visit=site_visit, + dataset=dataset, + site_id_geometry_map=site_id_geometry_map, + row=row, + graph=graph, + base_iri=base_iri, + ) + + # Add site visit + self.add_site_visit( + uri=site_visit, + dataset=dataset, + graph=graph, + ) + + # Add extra fields JSON + self.add_extra_fields_json( + subject_uri=provider_record_id_occurrence, + row=row, + graph=graph, + extra_schema=extra_schema, + ) + + def add_provider_identified( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + graph: rdflib.Graph, + ) -> None: + """Adds Identified By Provider to the Graph + + Args: + uri (rdflib.URIRef): URI to use for this node. + row (frictionless.Row): Row to retrieve data from + graph (rdflib.Graph): Graph to add to + """ + # Check for identifiedBy + if not row["identifiedBy"]: + return + + # Add to Graph + graph.add((uri, a, rdflib.PROV.Agent)) + graph.add((uri, rdflib.SDO.name, rdflib.Literal(row["identifiedBy"]))) + + def add_provider_recorded( + self, + uri: rdflib.URIRef | None, + row: frictionless.Row, + graph: rdflib.Graph, + ) -> None: + """Adds Recorded By Provider to the Graph + + Args: + uri (rdflib.URIRef | None): URI to use for this node. + row (frictionless.Row): Row to retrieve data from + graph (rdflib.Graph): Graph to add to + """ + # Check for valid subject and data + if not row["recordedBy"] or uri is None: + return + + # Add to Graph + graph.add((uri, a, rdflib.PROV.Agent)) + graph.add((uri, rdflib.SDO.name, rdflib.Literal(row["recordedBy"]))) + + def add_default_temporal_entity( + self, + uri: rdflib.URIRef, + site_visit_id_temporal_map: dict[str, str] | None, + row: frictionless.Row, + graph: rdflib.Graph, + ) -> rdflib.term.Node | None: + """Adds a default temporal entity BNode to the graph. + + Args: + uri (rdflib.URIRef): The subject that the temporal + entity will belong. + site_visit_id_temporal_map (dict[str, str] | None): The + map containing serialized rdf of default + temporal entity. + row (frictionless.Row): Raw data. + graph (rdflib.Graph): The graph to be modified. + + Returns: + rdflib.BNode | None: Reference to the top level blank node of the + temporal entity or None + """ + # Check to see map provided + if not site_visit_id_temporal_map: + return None + + # Create graph from supplied rdf + temp_graph = rdflib.Graph().parse(data=site_visit_id_temporal_map[row["siteVisitID"]]) + + # Obtain reference to subject node + top_node = next(temp_graph.subjects(a, rdflib.TIME.TemporalEntity)) + + # Merge with main graph using addition assignment (modify inplace). + # NOTE: Be aware that BNode IDs are not modified or checked during this process + # and there are risks of name collision during merging. If blank nodes are ever + # assigned names manually in future, then that may impact this operation + # Refer to https://rdflib.readthedocs.io/en/stable/merging.html for more information. + graph += temp_graph + + # Add hasTime property to uri node + graph.add((uri, rdflib.TIME.hasTime, top_node)) + + # Return reference to TemporalEntity + return top_node + + def add_observation_scientific_name( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + dataset: rdflib.URIRef, + provider: rdflib.URIRef, + provider_record_id_occurrence: rdflib.URIRef, + scientific_name: rdflib.URIRef, + site_visit_id_temporal_map: dict[str, str] | None, + graph: rdflib.Graph, + base_iri: rdflib.Namespace, + ) -> None: + """Adds Observation Scientific Name to the Graph + + Args: + uri (rdflib.URIRef): URI to use for this node. + row (frictionless.Row): Row to retrieve data from + dataset (rdflib.URIRef): Dataset this belongs to + provider (rdflib.URIRef): Provider associated with this node + provider_record_id_occurrence (rdflib.URIRef): Occurrence associated with this + node + scientific_name (rdflib.URIRef): Scientific Name associated with + this node + site_visit_id_temporal_map (dict[str, str] | None): Map + of site visit ids to default temporal entity to use if requlred. + graph (rdflib.Graph): Graph to add to + base_iri (rdflib.Namespace): Namespace used to construct IRIs + """ + # Get Timestamps + date_identified: models.temporal.Timestamp | None = row["dateIdentified"] or row["eventDateStart"] + + # Retrieve vocab for field + vocab = self.fields()["identificationMethod"].get_flexible_vocab() + + # Retrieve Vocab or Create on the Fly + term = vocab(graph=graph, source=dataset, base_iri=base_iri).get(row["identificationMethod"]) + + # Add to Graph + graph.add((uri, a, utils.namespaces.TERN.Observation)) + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + graph.add((uri, rdflib.RDFS.comment, rdflib.Literal("scientificName-observation"))) + graph.add((uri, rdflib.SOSA.hasFeatureOfInterest, provider_record_id_occurrence)) + graph.add((uri, rdflib.SOSA.hasResult, scientific_name)) + graph.add((uri, rdflib.SOSA.hasSimpleResult, rdflib.Literal(row["scientificName"]))) + graph.add((uri, rdflib.SOSA.observedProperty, CONCEPT_TAXON)) + + # Check for date provided within given template + # Declare temporal entity + temporal_entity: rdflib.term.Node | None = None + if date_identified is not None: + temporal_entity = rdflib.BNode() + graph.add((uri, rdflib.TIME.hasTime, temporal_entity)) + graph.add((temporal_entity, a, rdflib.TIME.Instant)) + graph.add((temporal_entity, date_identified.rdf_in_xsd, date_identified.to_rdf_literal())) + graph.add((uri, rdflib.SOSA.usedProcedure, term)) + # Check for which date provided + if not row["dateIdentified"] and row["eventDateStart"]: + # Add comment to temporal entity + comment = "Date unknown, template eventDateStart used as proxy" + graph.add((temporal_entity, rdflib.RDFS.comment, rdflib.Literal(comment))) + else: + # Add default temporal entity from map + temporal_entity = self.add_default_temporal_entity( + uri=uri, + site_visit_id_temporal_map=site_visit_id_temporal_map, + row=row, + graph=graph, + ) + # Add comment to temporal entity + if temporal_entity is not None: + comment = "Date unknown, site visit dates used as proxy." + graph.add((temporal_entity, rdflib.RDFS.comment, rdflib.Literal(comment))) + + # Check for identifiedBy + if row["identifiedBy"]: + graph.add((uri, rdflib.PROV.wasAssociatedWith, provider)) + + def add_observation_verbatim_id( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + dataset: rdflib.URIRef, + provider: rdflib.URIRef, + provider_record_id_occurrence: rdflib.URIRef, + sample_specimen: rdflib.URIRef, + verbatim_id: rdflib.URIRef, + site_visit_id_temporal_map: dict[str, str] | None, + graph: rdflib.Graph, + base_iri: rdflib.Namespace, + ) -> None: + """Adds Observation Verbatim ID to the Graph + + Args: + uri (rdflib.URIRef): URI to use for this node. + row (frictionless.Row): Row to retrieve data from + dataset (rdflib.URIRef): Dataset this belongs to + provider (rdflib.URIRef): Provider associated with this node + provider_record_id_occurrence (rdflib.URIRef): Occurrence associated with this + node + sample_specimen (rdflib.URIRef): Sample Specimen associated with + this node + verbatim_id (rdflib.URIRef): Verbatim ID associated with this node + site_visit_id_temporal_map (dict[str, str] | None): Map of site + visit ids to default temporal entity rdf. + graph (rdflib.Graph): Graph to add to + base_iri (rdflib.Namespace): Namespace used to construct IRIs + """ + # Check for verbatimIdentification + if not row["verbatimIdentification"]: + return + + # Get Timestamp + date_identified: models.temporal.Timestamp | None = row["dateIdentified"] or row["eventDateStart"] + + # Choose Feature of Interest + # The Feature of Interest is the Specimen Sample if it is determined + # that this row has a specimen, otherwise it is Field Sample + foi = sample_specimen if has_specimen(row) else provider_record_id_occurrence + + # Retrieve vocab for field + vocab = self.fields()["identificationMethod"].get_flexible_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset, base_iri=base_iri).get(row["identificationMethod"]) + + # Add to Graph + graph.add((uri, a, utils.namespaces.TERN.Observation)) + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + graph.add((uri, rdflib.RDFS.comment, rdflib.Literal("verbatimID-observation"))) + graph.add((uri, rdflib.SOSA.hasFeatureOfInterest, foi)) + graph.add((uri, rdflib.SOSA.hasResult, verbatim_id)) + graph.add((uri, rdflib.SOSA.hasSimpleResult, rdflib.Literal(row["verbatimIdentification"]))) + graph.add((uri, rdflib.SOSA.observedProperty, CONCEPT_TAXON)) + + # Declare temporal entity allowing for correct type assignments + temporal_entity: rdflib.term.Node | None = None + # Check to see if date provided from own template + if date_identified is not None: + temporal_entity = rdflib.BNode() + graph.add((uri, rdflib.TIME.hasTime, temporal_entity)) + graph.add((temporal_entity, a, rdflib.TIME.Instant)) + graph.add((temporal_entity, date_identified.rdf_in_xsd, date_identified.to_rdf_literal())) + graph.add((uri, rdflib.SOSA.usedProcedure, term)) + # Check for dateIdentified + if not row["dateIdentified"]: + # Add comment to temporal entity + comment = "Date unknown, template eventDateStart used as proxy" + graph.add((temporal_entity, rdflib.RDFS.comment, rdflib.Literal(comment))) + else: + # Add default temporal entity from map + temporal_entity = self.add_default_temporal_entity( + uri=uri, + site_visit_id_temporal_map=site_visit_id_temporal_map, + row=row, + graph=graph, + ) + # Add comment to temporal entity + if temporal_entity is not None: + comment = "Date unknown, site visit dates used as proxy." + graph.add((temporal_entity, rdflib.RDFS.comment, rdflib.Literal(comment))) + + # Check for identifiedBy + if row["identifiedBy"]: + graph.add((uri, rdflib.PROV.wasAssociatedWith, provider)) + + def add_provider_recorded_by_agent( + self, + uri: rdflib.URIRef | None, + row: frictionless.Row, + graph: rdflib.Graph, + ) -> None: + """Adds the provider agent to the graph. + + Args: + uri (rdflib.URIRef | None): Subject of the node. + row (frictionless.Row): Raw data. + graph (rdflib.Graph): Graph to be modified. + """ + # Ensure data and URI passed in + if uri is None or not row["recordedBy"]: + return + + # Add type + graph.add((uri, a, rdflib.PROV.Agent)) + + # Add name + graph.add((uri, rdflib.SDO.name, rdflib.Literal(row["recordedBy"]))) + + def add_record_id_datatype( + self, + uri: rdflib.URIRef | None, + attribution: rdflib.URIRef | None, + value: str | None, + graph: rdflib.Graph, + ) -> None: + """Adds the owner record id datatype to the graph. + + Args: + uri (rdflib.URIRef | None): Subject of the node or None. + attribution (rdflib.URIRef | None): Attribution of the datatype or None. + value (str | None): Raw value provided for the record id source. + graph (rdflid.Graph): Graph to be modified. + """ + # Check to see subject provided + if uri is None: + return + + # Add type + graph.add((uri, a, rdflib.RDFS.Datatype)) + + # Add label + graph.add((uri, rdflib.SKOS.prefLabel, rdflib.Literal(f"{value} recordID"))) + graph.add((uri, rdflib.SKOS.definition, rdflib.Literal("An identifier for the record"))) + + # Add attribution + if attribution is not None: + graph.add((uri, rdflib.PROV.qualifiedAttribution, attribution)) + + def add_attribution( + self, + uri: rdflib.URIRef | None, + provider: rdflib.URIRef | None, + provider_role_type: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Adds an attribution node to the graph. + + Args: + uri (rdflib.URIRef | None): Subject of the node or None. + provider (rdflib.URIRef | None): Provider of the datatype or None. + provider_role_type (rdflib.URIRef): Role type of provider. + graph (rdflid.Graph): Graph to be modified. + """ + # Check to see subject provided. + if uri is None: + return + + # Add type + graph.add((uri, a, rdflib.PROV.Attribution)) + + if provider is not None: + graph.add((uri, rdflib.PROV.agent, provider)) + + graph.add((uri, rdflib.PROV.hadRole, provider_role_type)) + + def add_owner_record_id_provider( + self, + uri: rdflib.URIRef | None, + row: frictionless.Row, + graph: rdflib.Graph, + ) -> None: + """Adds the provider owner record id node. + Args: + uri (rdflib.URIRef): Subject of the node. + row (frictionless.Row): Raw data. + graph (rdflib.Graph): Graph to be modified.: + """ + # Check that a subject uri was supplied + if uri is None: + return + + # Add type + graph.add((uri, a, rdflib.PROV.Agent)) + + # Add name + graph.add((uri, rdflib.SDO.name, rdflib.Literal(row["ownerRecordIDSource"]))) + + def add_provider_record_id_agent( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + graph: rdflib.Graph, + ) -> None: + """Adds provider record id agent to the graph. + Args: + uri (rdflib.URIRef): Subject of the node. + row (frictionless.Row): Raw data. + graph (rdflib.Graph): Graph to be modified. + """ + # Add type + graph.add((uri, a, rdflib.PROV.Agent)) + + # Add name + graph.add((uri, rdflib.SDO.name, rdflib.Literal(row["providerRecordIDSource"]))) + + def add_id_qualifier_attribute( + self, + *, + uri: rdflib.URIRef | None, + id_qualifier: str | None, + id_qualifier_value: rdflib.URIRef | None, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Adds Identification Qualifier Attribute to the Graph + + Args: + uri: URI to use for this node. + id_qualifier: identificationQualifier value from the template + id_qualifier_value: Identification Qualifier Value associated with this node. + dataset (rdflib.URIRef): Dataset this belongs to + graph (rdflib.Graph): Graph to add to + """ + # Check identificationQualifier + if uri is None: + return + + # Identification Qualifier Attribute + graph.add((uri, a, utils.namespaces.TERN.Attribute)) + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + graph.add((uri, utils.namespaces.TERN.attribute, CONCEPT_ID_UNCERTAINTY)) + if id_qualifier: + graph.add((uri, utils.namespaces.TERN.hasSimpleValue, rdflib.Literal(id_qualifier))) + if id_qualifier_value: + graph.add((uri, utils.namespaces.TERN.hasValue, id_qualifier_value)) + + def add_id_qualifier_value( + self, + *, + uri: rdflib.URIRef | None, + id_qualifier: str | None, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + base_iri: rdflib.Namespace, + ) -> None: + """Adds Identification Qualifier Value to the Graph + + Args: + uri: URI to use for this node. + id_qualifier: identificationQualifier value from the template + dataset (rdflib.URIRef): Dataset this belongs to + graph (rdflib.Graph): Graph to add to + base_iri (rdflib.Namespace): Namespace used to construct IRIs + """ + # Check node should be created + if uri is None: + return + + graph.add((uri, a, utils.namespaces.TERN.IRI)) + graph.add((uri, a, utils.namespaces.TERN.Value)) + + if id_qualifier: + # Add label + graph.add((uri, rdflib.RDFS.label, rdflib.Literal(id_qualifier))) + + # Retrieve vocab for field + vocab = self.fields()["identificationQualifier"].get_flexible_vocab() + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset, base_iri=base_iri).get(id_qualifier) + # Identification Qualifier Value + graph.add((uri, rdflib.RDF.value, term)) + + def add_id_qualifier_collection( + self, + *, + uri: rdflib.URIRef | None, + id_qualifier: str | None, + id_qualifier_attribute: rdflib.URIRef | None, + observation_scientific_name: rdflib.URIRef, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Add a identification qualifier Collection to the graph + + Args: + uri: The uri for the Collection. + id_qualifier: identificationQualifier value from template. + id_qualifier_attribute: The uri for the attribute node. + observation_scientific_name: The node that should be a member of the collection. + dataset: The uri for the dateset node. + graph: The graph. + """ + if uri is None: + return + + # Add type + graph.add((uri, a, rdflib.SDO.Collection)) + # Add identifier + if id_qualifier: + graph.add( + ( + uri, + rdflib.SDO.name, + rdflib.Literal(f"Occurrence Collection - Identification Qualifier - {id_qualifier}"), + ) + ) + # Add link to dataset + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + # add link to the scientific name observation node + graph.add((uri, rdflib.SDO.member, observation_scientific_name)) + # Add link to attribute + if id_qualifier_attribute: + graph.add((uri, utils.namespaces.TERN.hasAttribute, id_qualifier_attribute)) + + def add_id_remarks_attribute( + self, + *, + uri: rdflib.URIRef | None, + id_remarks: str | None, + id_remarks_value: rdflib.URIRef | None, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Adds Identification Remarks Attribute to the Graph + + Args: + uri: URI to use for this node. + id_remarks: identificationRemarks value from the template + id_remarks_value: Identification Remarks Value associated with this node + dataset (rdflib.URIRef): Dataset this belongs to + graph (rdflib.Graph): Graph to add to + """ + # Check identificationRemarks + if uri is None: + return + + # Identification Remarks Attribute + graph.add((uri, a, utils.namespaces.TERN.Attribute)) + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + graph.add((uri, utils.namespaces.TERN.attribute, CONCEPT_ID_REMARKS)) + if id_remarks: + graph.add((uri, utils.namespaces.TERN.hasSimpleValue, rdflib.Literal(id_remarks))) + if id_remarks_value: + graph.add((uri, utils.namespaces.TERN.hasValue, id_remarks_value)) + + def add_id_remarks_value( + self, + *, + uri: rdflib.URIRef | None, + id_remarks: str | None, + graph: rdflib.Graph, + ) -> None: + """Adds Identification Remarks Value to the Graph + + Args: + uri: URI to use for this node + id_remarks: identificationRemarks value from the template + graph (rdflib.Graph): Graph to add to + """ + # Check identificationRemarks + if uri is None: + return + + # Identification Remarks Value + graph.add((uri, a, utils.namespaces.TERN.Text)) + graph.add((uri, a, utils.namespaces.TERN.Value)) + graph.add((uri, rdflib.RDF.value, rdflib.Literal(id_remarks))) + + def add_id_remarks_collection( + self, + *, + uri: rdflib.URIRef | None, + id_remarks: str | None, + id_remarks_attribute: rdflib.URIRef | None, + observation_scientific_name: rdflib.URIRef, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Add a identification remarks Collection to the graph + + Args: + uri: The uri for the Collection. + id_remarks: identificationRemarks value from template + id_remarks_attribute: The uri for the attribute node. + observation_scientific_name: The node that should be a member of the collection. + dataset: The uri for the dateset node. + graph: The graph. + """ + if uri is None: + return + + # Add type + graph.add((uri, a, rdflib.SDO.Collection)) + # Add identifier + if id_remarks: + graph.add( + ( + uri, + rdflib.SDO.name, + rdflib.Literal(f"Occurrence Collection - Identification Remarks - {id_remarks}"), + ) + ) + # Add link to dataset + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + # add link to the scientific name observation node + graph.add((uri, rdflib.SDO.member, observation_scientific_name)) + # Add link to attribute + if id_remarks_attribute: + graph.add((uri, utils.namespaces.TERN.hasAttribute, id_remarks_attribute)) + + def add_text_scientific_name( + self, + uri: rdflib.URIRef, + dataset: rdflib.URIRef, + row: frictionless.Row, + graph: rdflib.Graph, + ) -> None: + """Adds Text Scientific Name to the Graph + + Args: + uri (rdflib.URIRef): URI to use for this node. + dataset (rdflib.URIRef): Dataset this belongs to + row (frictionless.Row): Row to retrieve data from + graph (rdflib.Graph): Graph to add to + """ + # Add to Graph + graph.add((uri, a, utils.namespaces.TERN.Text)) + graph.add((uri, a, utils.namespaces.TERN.Value)) + graph.add((uri, a, utils.namespaces.TERN.FeatureOfInterest)) + graph.add((uri, rdflib.RDFS.label, rdflib.Literal("scientificName"))) + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + graph.add((uri, rdflib.RDF.value, rdflib.Literal(row["scientificName"]))) + graph.add((uri, utils.namespaces.TERN.featureType, CONCEPT_SCIENTIFIC_NAME)) + + def add_catalog_number_datatype( + self, + uri: rdflib.URIRef | None, + provider: rdflib.URIRef | None, + value: str | None, + graph: rdflib.Graph, + ) -> None: + """Adds catalog number datatype to the graph. + Args: + uri (rdflib.URIRef | None): Subject of the node. + provider (rdflib.URIRef | None): Corresponding provider. + value (str | None): Catalog number source name obtained from raw data. + graph (rdflib.Graph): Graph to be modified. + """ + # Check subject was provided + if uri is None: + return + + # Add type + graph.add((uri, a, rdflib.RDFS.Datatype)) + + # Add label + if value is not None: + graph.add((uri, rdflib.SKOS.prefLabel, rdflib.Literal(f"{value} catalogNumber"))) + + # Add definition + graph.add((uri, rdflib.SKOS.definition, rdflib.Literal("A catalog number for the sample"))) + + # Add attribution + if provider is not None: + graph.add((uri, rdflib.PROV.wasAttributedTo, provider)) + + def add_catalog_number_provider( + self, + uri: rdflib.URIRef | None, + row: frictionless.Row, + graph: rdflib.Graph, + ) -> None: + """Adds the catalog number provider to the graph. + Args: + uri (rdflib.URIRef | None): Subject of the node. + row (frictionlee.Row): Raw data. + graph (rdflib.Graph): Graph to be modified. + """ + # Check subject was provided + if uri is None: + return + + # Add type + graph.add((uri, a, rdflib.PROV.Agent)) + + # Add name + graph.add((uri, rdflib.SDO.name, rdflib.Literal(row["catalogNumberSource"]))) + + def add_other_catalog_numbers_provider( + self, + uri: rdflib.URIRef | None, + row: frictionless.Row, + graph: rdflib.Graph, + ) -> None: + """Adds other catalog numbers provider to the graph. + Args: + uri (rdflib.URIRef | None): Subject of the node. + row (frictionless.Row): Raw data. + graph (rdflib.Graph): Graph to be modified. + """ + # Check that subject was provided + if uri is None: + return + + # Add type + graph.add((uri, a, rdflib.PROV.Agent)) + + # Add name + graph.add((uri, rdflib.SDO.name, rdflib.Literal(row["otherCatalogNumbersSource"]))) + + def add_sampling_specimen( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + dataset: rdflib.URIRef, + provider_record_id_occurrence: rdflib.URIRef, + sample_specimen: rdflib.URIRef, + site_id_geometry_map: dict[str, str] | None, + site_visit_id_temporal_map: dict[str, str] | None, + graph: rdflib.Graph, + ) -> None: + """Adds Sampling Specimen to the Graph + + Args: + uri (rdflib.URIRef): URI to use for this node. + row (frictionless.Row): Row to retrieve data from + dataset (rdflib.URIRef): Dataset this belongs to + provider_record_id_occurrence (rdflib.URIRef): Occurrence associated with this + node + sample_specimen (rdflib.URIRef): Sample Specimen associated with + this node + site_id_geometry_map (dict[str, str] | None): Map with default wkt + string for a given site id. + site_visit_id_temporal_map (dict[str, str] | None): Map with default + rdf string for a given site visit id. + graph (rdflib.Graph): Graph to add to + """ + # Check if Row has a Specimen + if not has_specimen(row): + return + + # Extract values + latitude = row["decimalLatitude"] + longitude = row["decimalLongitude"] + geodetic_datum = row["geodeticDatum"] + site_id = row["siteID"] + + # Check to see if lat long provided + if latitude is not None and longitude is not None: + # Create geometry + geometry = models.spatial.Geometry( + raw=models.spatial.LatLong(latitude, longitude), + datum=geodetic_datum, + ) + + # If not then use default geometry map + elif site_id_geometry_map is not None and (default_geometry := site_id_geometry_map.get(site_id)) is not None: + # Create geometry from geosparql wkt literal + geometry = models.spatial.Geometry.from_geosparql_wkt_literal(default_geometry) + + else: + # Should not reach here since validated data provided, however if + # it does come to it the corresponding node will be omitted + return + + # Get Timestamp + timestamp: models.temporal.Timestamp | None = row["preparedDate"] or row["eventDateStart"] + + # Add to Graph + graph.add((uri, a, utils.namespaces.TERN.Sampling)) + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + graph.add((uri, rdflib.RDFS.comment, rdflib.Literal("specimen-sampling"))) + graph.add((uri, rdflib.SOSA.hasFeatureOfInterest, provider_record_id_occurrence)) + graph.add((uri, rdflib.SOSA.hasResult, sample_specimen)) + graph.add((uri, rdflib.SOSA.usedProcedure, CONCEPT_PROCEDURE_SAMPLING)) + + # Declare temporal entity allowing for correct assignment types + temporal_entity: rdflib.term.Node | None = None + # Check to see date already found + if timestamp is not None: + temporal_entity = rdflib.BNode() + graph.add((uri, rdflib.TIME.hasTime, temporal_entity)) + graph.add((temporal_entity, a, rdflib.TIME.Instant)) + graph.add((temporal_entity, timestamp.rdf_in_xsd, timestamp.to_rdf_literal())) + # Check for preparedDate + if not row["preparedDate"]: + # Add comment to temporal entity + temporal_comment = "Date unknown, template eventDateStart used as proxy" + graph.add((temporal_entity, rdflib.RDFS.comment, rdflib.Literal(temporal_comment))) + else: + # Use default rdf from site visit as temporal entity + temporal_entity = self.add_default_temporal_entity( + uri=uri, + site_visit_id_temporal_map=site_visit_id_temporal_map, + row=row, + graph=graph, + ) + # Add comment to temporal entity + if temporal_entity is not None: + comment = "Date unknown, site visit dates used as proxy." + graph.add((temporal_entity, rdflib.RDFS.comment, rdflib.Literal(comment))) + + # Add geometry + geometry_node = rdflib.BNode() + graph.add((uri, utils.namespaces.GEO.hasGeometry, geometry_node)) + graph.add((geometry_node, a, utils.namespaces.GEO.Geometry)) + graph.add((geometry_node, utils.namespaces.GEO.asWKT, geometry.to_transformed_crs_rdf_literal())) + + self.add_geometry_supplied_as( + subj=uri, + pred=utils.namespaces.GEO.hasGeometry, + obj=geometry_node, + geom=geometry, + graph=graph, + ) + + # Add comment to geometry + spatial_comment = "Location unknown, location of field sampling used as proxy" + graph.add((geometry_node, rdflib.RDFS.comment, rdflib.Literal(spatial_comment))) + + # Check for coordinateUncertaintyInMeters + if row["coordinateUncertaintyInMeters"]: + # Add Spatial Accuracy + accuracy = rdflib.Literal(row["coordinateUncertaintyInMeters"], datatype=rdflib.XSD.double) + graph.add((uri, utils.namespaces.GEO.hasMetricSpatialAccuracy, accuracy)) + + def add_text_verbatim_id( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + graph: rdflib.Graph, + ) -> None: + """Adds Text Verbatim ID to the Graph + + Args: + uri (rdflib.URIRef): URI to use for this node. + row (frictionless.Row): Row to retrieve data from + graph (rdflib.Graph): Graph to add to + """ + # Check for verbatimIdentification + if not row["verbatimIdentification"]: + return + + # Add to Graph + graph.add((uri, a, utils.namespaces.TERN.Text)) + graph.add((uri, a, utils.namespaces.TERN.Value)) + graph.add((uri, rdflib.RDF.value, rdflib.Literal(row["verbatimIdentification"]))) + + def add_record_number_datatype( + self, + uri: rdflib.URIRef | None, + provider: rdflib.URIRef | None, + row: frictionless.Row, + graph: rdflib.Graph, + ) -> None: + """Adds record number datatype to the graph. + Args: + uri (rdflib.URIRef | None): The subject of the node + or None if uri wasn't created. + provider (rdflib.URIRef | None): The corresponding + provider uri. + row (frictionless.Row): Row from the template. + graph (rdflib.Graph): Graph to be modified. + """ + # Check subject provided + if uri is None: + return + + # if no recordNumber, don't create this datatype because it would be unused. + if not row["recordNumber"]: + return + + # Add type + graph.add((uri, a, rdflib.RDFS.Datatype)) + + # Add label + if recorded_by := row["recordedBy"]: + graph.add((uri, rdflib.SKOS.prefLabel, rdflib.Literal(f"{recorded_by} recordNumber"))) + + # Add definition + graph.add( + ( + uri, + rdflib.SKOS.definition, + rdflib.Literal( + "The record number of the original observation from the original observer of the organism" + ), + ) + ) + + # Add attribution + if provider is not None: + graph.add((uri, rdflib.PROV.wasAttributedTo, provider)) + + def add_sample_specimen( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + dataset: rdflib.URIRef, + sampling_specimen: rdflib.URIRef, + provider_record_id_occurrence: rdflib.URIRef, + graph: rdflib.Graph, + base_iri: rdflib.Namespace, + ) -> None: + """Adds Sample Specimen to the Graph + + Args: + uri (rdflib.URIRef): URI to use for this node. + row (frictionless.Row): Row to retrieve data from + dataset (rdflib.URIRef): Dataset this belongs to + sampling_specimen (rdflib.URIRef): Sampling Specimen associated + with this node + provider_record_id_occurrence (rdflib.URIRef): Occurrence associated with this + node + graph (rdflib.Graph): Graph to add to + base_iri (rdflib.Namespace): Namespace used to construct IRIs + """ + # Check if Row has a Specimen + if not has_specimen(row): + return + + # Retrieve vocab for field (multiple exists for kingdom) + vocab = self.fields()["kingdom"].get_flexible_vocab("KINGDOM_SPECIMEN") + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset, base_iri=base_iri).get(row["kingdom"]) + + # Add to Graph + graph.add((uri, a, utils.namespaces.TERN.FeatureOfInterest)) + graph.add((uri, a, utils.namespaces.TERN.Sample)) + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + graph.add((uri, rdflib.RDFS.comment, rdflib.Literal("specimen-sample"))) + graph.add((uri, rdflib.SOSA.isResultOf, sampling_specimen)) + graph.add((uri, rdflib.SOSA.isSampleOf, provider_record_id_occurrence)) + graph.add((uri, utils.namespaces.TERN.featureType, term)) + + def add_data_generalizations_attribute( + self, + uri: rdflib.URIRef | None, + data_generalizations: str | None, + dataset: rdflib.URIRef, + data_generalizations_value: rdflib.URIRef | None, + graph: rdflib.Graph, + ) -> None: + """Adds Data Generalizations Attribute to the Graph + + Args: + uri: URI to use for this node. + data_generalizations: dataGeneralizations value from the CSV + dataset: Dataset this belongs to + data_generalizations_value: Data Generalizations Value associated with this node + graph: Graph to add to + """ + # Check Existence + if uri is None: + return + + # Data Generalizations Attribute + graph.add((uri, a, utils.namespaces.TERN.Attribute)) + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + graph.add((uri, utils.namespaces.TERN.attribute, CONCEPT_DATA_GENERALIZATIONS)) + if data_generalizations: + graph.add((uri, utils.namespaces.TERN.hasSimpleValue, rdflib.Literal(data_generalizations))) + if data_generalizations_value is not None: + graph.add((uri, utils.namespaces.TERN.hasValue, data_generalizations_value)) + + def add_data_generalizations_value( + self, + uri: rdflib.URIRef | None, + data_generalizations: str | None, + graph: rdflib.Graph, + ) -> None: + """Adds Data Generalizations Value to the Graph + + Args: + uri (rdflib.URIRef): URI to use for this node + data_generalizations: dataGeneralizations value from the CSV + graph (rdflib.Graph): Graph to add to + """ + # Check Existence + if uri is None: + return + + # Data Generalizations Value + graph.add((uri, a, utils.namespaces.TERN.Text)) + graph.add((uri, a, utils.namespaces.TERN.Value)) + graph.add((uri, rdflib.RDF.value, rdflib.Literal(data_generalizations))) + + def add_data_generalizations_collection( + self, + uri: rdflib.URIRef | None, + data_generalizations: str | None, + data_generalizations_attribute: rdflib.URIRef | None, + provider_record_id_occurrence: rdflib.URIRef, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Add a data generalizations attribute Sample Collection to the graph + + Args: + uri: The uri for the SampleCollection. + data_generalizations: dataGeneralizations value from template. + data_generalizations_attribute: The uri for the attribute node. + provider_record_id_occurrence: Occurrence associated with this + node + dataset: The uri for the dateset node. + graph: The graph. + """ + if uri is None: + return + + # Add type + graph.add((uri, a, rdflib.SDO.Collection)) + # Add identifier + if data_generalizations: + graph.add( + ( + uri, + rdflib.SDO.name, + rdflib.Literal( + f"Occurrence Collection - Data Generalizations - {data_generalizations}", + ), + ) + ) + # Add link to dataset + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + # add link to the sample field + graph.add((uri, rdflib.SDO.member, provider_record_id_occurrence)) + # Add link to attribute + if data_generalizations_attribute: + graph.add((uri, utils.namespaces.TERN.hasAttribute, data_generalizations_attribute)) + + def add_taxon_rank_attribute( + self, + *, + uri: rdflib.URIRef | None, + taxon_rank: str | None, + taxon_rank_value: rdflib.URIRef | None, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Adds Taxon Rank Attribute to the Graph + + Args: + uri: URI to use for this node. + taxon_rank: taxonRank value from the template. + taxon_rank_value: Taxon Rank Value associated with this node + dataset (rdflib.URIRef): Dataset this belongs to + graph (rdflib.Graph): Graph to add to + """ + # Check Existence + if uri is None: + return + + # Taxon Rank Attribute + graph.add((uri, a, utils.namespaces.TERN.Attribute)) + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + graph.add((uri, utils.namespaces.TERN.attribute, CONCEPT_TAXON_RANK)) + if taxon_rank: + graph.add((uri, utils.namespaces.TERN.hasSimpleValue, rdflib.Literal(taxon_rank))) + if taxon_rank_value: + graph.add((uri, utils.namespaces.TERN.hasValue, taxon_rank_value)) + + def add_taxon_rank_value( + self, + *, + uri: rdflib.URIRef | None, + taxon_rank: str | None, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + base_iri: rdflib.Namespace, + ) -> None: + """Adds Taxon Rank Value to the Graph + + Args: + uri: URI to use for this node + taxon_rank: taxonRank value from the template. + dataset (rdflib.URIRef): Dataset this belongs to + graph (rdflib.Graph): Graph to add to + base_iri (rdflib.Namespace): Namespace used to construct IRIs + """ + # Check Existence + if uri is None: + return + + graph.add((uri, a, utils.namespaces.TERN.IRI)) + graph.add((uri, a, utils.namespaces.TERN.Value)) + + if taxon_rank: + # Add label + graph.add((uri, rdflib.RDFS.label, rdflib.Literal(taxon_rank))) + + # Retrieve vocab for field + vocab = self.fields()["taxonRank"].get_flexible_vocab() + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset, base_iri=base_iri).get(taxon_rank) + # Taxon Rank Value + graph.add((uri, rdflib.RDF.value, term)) + + def add_taxon_rank_collection( + self, + *, + uri: rdflib.URIRef | None, + taxon_rank: str | None, + taxon_rank_attribute: rdflib.URIRef | None, + observation_scientific_name: rdflib.URIRef, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Add a taxon rank Collection to the graph + + Args: + uri: The uri for the Collection. + taxon_rank: taxonRank value from template. + taxon_rank_attribute: The uri for the attribute node. + observation_scientific_name: The node that should be a member of the collection. + dataset: The uri for the dateset node. + graph: The graph. + """ + if uri is None: + return + + # Add type + graph.add((uri, a, rdflib.SDO.Collection)) + # Add identifier + if taxon_rank: + graph.add( + ( + uri, + rdflib.SDO.name, + rdflib.Literal(f"Occurrence Collection - Taxon Rank - {taxon_rank}"), + ) + ) + # Add link to dataset + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + # add link to the scientific name observation node + graph.add((uri, rdflib.SDO.member, observation_scientific_name)) + # Add link to attribute + if taxon_rank_attribute: + graph.add((uri, utils.namespaces.TERN.hasAttribute, taxon_rank_attribute)) + + def add_individual_count_observation( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + dataset: rdflib.URIRef, + provider_record_id_occurrence: rdflib.URIRef, + individual_count_value: rdflib.URIRef, + site_visit_id_temporal_map: dict[str, str] | None, + graph: rdflib.Graph, + ) -> None: + """Adds Individual Count Observation to the Graph + + Args: + uri (rdflib.URIRef): URI to use for this node. + row (frictionless.Row): Row to retrieve data from + dataset (rdflib.URIRef): Dataset this belongs to + provider_record_id_occurrence (rdflib.URIRef): Occurrence associated with this + node + individual_count_value (rdflib.URIRef): Individual Count Value + associated with this node + site_visit_id_temporal_map (dict[str, str] | None): Map providing a + default temporal entity rdf for a site visit id. + graph (rdflib.Graph): Graph to add to + """ + # Check Existence + if not row["individualCount"]: + return + + # Get Timestamp + event_date: models.temporal.Timestamp | None = row["eventDateStart"] + + # Retrieve Vocab or Create on the Fly + vocab = vocabs.sampling_protocol.HUMAN_OBSERVATION.iri # Always Human Observation + + # Individual Count Observation + graph.add((uri, a, utils.namespaces.TERN.Observation)) + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + graph.add((uri, rdflib.RDFS.comment, rdflib.Literal("individualCount-observation"))) + graph.add((uri, rdflib.SOSA.hasFeatureOfInterest, provider_record_id_occurrence)) + graph.add((uri, rdflib.SOSA.hasResult, individual_count_value)) + graph.add((uri, rdflib.SOSA.hasSimpleResult, rdflib.Literal(row["individualCount"]))) + graph.add((uri, rdflib.SOSA.observedProperty, CONCEPT_INDIVIDUAL_COUNT)) + graph.add((uri, rdflib.SOSA.usedProcedure, vocab)) + + # Declare temporal entity to allow correct assignment typechecks + temporal_entity: rdflib.term.Node | None = None + # Check event date supplied + if event_date is not None: + temporal_entity = rdflib.BNode() + graph.add((uri, rdflib.TIME.hasTime, temporal_entity)) + graph.add((temporal_entity, a, rdflib.TIME.Instant)) + graph.add((temporal_entity, event_date.rdf_in_xsd, event_date.to_rdf_literal())) + # Add comment + comment = "Date unknown, template eventDateStart used as proxy" + else: + # Use default rdf from site visit as temporal entity + temporal_entity = self.add_default_temporal_entity( + uri=uri, + site_visit_id_temporal_map=site_visit_id_temporal_map, + row=row, + graph=graph, + ) + # Add comment to temporal entity + comment = "Date unknown, site visit dates used as proxy." + + # ASsert temporal_entity type and add + if temporal_entity is not None: + graph.add((temporal_entity, rdflib.RDFS.comment, rdflib.Literal(comment))) + + # Add method comment to node + method_comment = "Observation method unknown, 'human observation' used as proxy" + graph.add((uri, rdflib.RDFS.comment, rdflib.Literal(method_comment))) + + def add_individual_count_value( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + graph: rdflib.Graph, + ) -> None: + """Adds Individual Count Value to the Graph + + Args: + uri (rdflib.URIRef): URI to use for this node + row (frictionless.Row): Row to retrieve data from + graph (rdflib.Graph): Graph to add to + """ + # Check Existence + if not row["individualCount"]: + return + + # Individual Count Value + graph.add((uri, a, utils.namespaces.TERN.Integer)) + graph.add((uri, a, utils.namespaces.TERN.Value)) + graph.add((uri, rdflib.RDFS.label, rdflib.Literal("individual-count"))) + graph.add((uri, rdflib.RDF.value, rdflib.Literal(row["individualCount"]))) + + def add_organism_remarks_observation( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + dataset: rdflib.URIRef, + provider_record_id_occurrence: rdflib.URIRef, + organism_remarks_value: rdflib.URIRef, + site_visit_id_temporal_map: dict[str, str] | None, + graph: rdflib.Graph, + ) -> None: + """Adds Organism Remarks Observation to the Graph + + Args: + uri (rdflib.URIRef): URI to use for this node. + row (frictionless.Row): Row to retrieve data from + dataset (rdflib.URIRef): Dataset this belongs to + provider_record_id_occurrence (rdflib.URIRef): Occurrence associated with this + node + organism_remarks_value (rdflib.URIRef): Organism Remarks Value + associated with this node + site_visit_id_temporal_map (dict[str, str] | None): Map of site visit + id to default temporal entity as serialized rdf. + graph (rdflib.Graph): Graph to add to + """ + # Check Existence + if not row["organismRemarks"]: + return + + # Get Timestamp + event_date: models.temporal.Timestamp | None = row["eventDateStart"] + + # Retrieve Vocab or Create on the Fly + vocab = vocabs.sampling_protocol.HUMAN_OBSERVATION.iri # Always Human Observation + + # Organism Remarks Observation + graph.add((uri, a, utils.namespaces.TERN.Observation)) + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + graph.add((uri, rdflib.RDFS.comment, rdflib.Literal("organismRemarks-observation"))) + graph.add((uri, rdflib.SOSA.hasFeatureOfInterest, provider_record_id_occurrence)) + graph.add((uri, rdflib.SOSA.hasResult, organism_remarks_value)) + graph.add((uri, rdflib.SOSA.hasSimpleResult, rdflib.Literal(row["organismRemarks"]))) + graph.add((uri, rdflib.SOSA.observedProperty, CONCEPT_ORGANISM_REMARKS)) + graph.add((uri, rdflib.SOSA.usedProcedure, vocab)) + + # Declare temporal entity to allow correct assignment typechecks + temporal_entity: rdflib.term.Node | None = None + # Check for eventDateStart + if event_date is not None: + temporal_entity = rdflib.BNode() + graph.add((uri, rdflib.TIME.hasTime, temporal_entity)) + graph.add((temporal_entity, a, rdflib.TIME.Instant)) + graph.add((temporal_entity, event_date.rdf_in_xsd, event_date.to_rdf_literal())) + # Add comment to temporal entity + comment = "Date unknown, template eventDateStart used as proxy" + graph.add((temporal_entity, rdflib.RDFS.comment, rdflib.Literal(comment))) + else: + # Use default rdf from site visit as temporal entity + temporal_entity = self.add_default_temporal_entity( + uri=uri, + site_visit_id_temporal_map=site_visit_id_temporal_map, + row=row, + graph=graph, + ) + # Add comment to temporal entity + if temporal_entity is not None: + comment = "Date unknown, site visit dates used as proxy." + graph.add((temporal_entity, rdflib.RDFS.comment, rdflib.Literal(comment))) + + # Add method comment to node + method_comment = "Observation method unknown, 'human observation' used as proxy" + graph.add((uri, rdflib.RDFS.comment, rdflib.Literal(method_comment))) + + def add_organism_remarks_value( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + graph: rdflib.Graph, + ) -> None: + """Adds Organism Remarks Value to the Graph + + Args: + uri (rdflib.URIRef): URI to use for this node + row (frictionless.Row): Row to retrieve data from + graph (rdflib.Graph): Graph to add to + """ + # Check Existence + if not row["organismRemarks"]: + return + + # Organism Remarks Value + graph.add((uri, a, utils.namespaces.TERN.Text)) + graph.add((uri, a, utils.namespaces.TERN.Value)) + graph.add((uri, rdflib.RDFS.label, rdflib.Literal("organism-remarks"))) + graph.add((uri, rdflib.RDF.value, rdflib.Literal(row["organismRemarks"]))) + + def add_habitat_attribute( + self, + uri: rdflib.URIRef | None, + habitat: str | None, + dataset: rdflib.URIRef, + habitat_value: rdflib.URIRef | None, + graph: rdflib.Graph, + ) -> None: + """Adds Habitat Attribute to the Graph + + Args: + uri: URI to use for this node. + habitat: Raw habitat from CSV. + dataset: Dataset this belongs to + habitat_value: Habitat Value associated with this node + graph: Graph to add to + """ + # Check Existence + if uri is None: + return + + # Habitat Attribute + graph.add((uri, a, utils.namespaces.TERN.Attribute)) + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + graph.add((uri, utils.namespaces.TERN.attribute, CONCEPT_HABITAT)) + if habitat: + graph.add((uri, utils.namespaces.TERN.hasSimpleValue, rdflib.Literal(habitat))) + if habitat_value is not None: + graph.add((uri, utils.namespaces.TERN.hasValue, habitat_value)) + + def add_habitat_value( + self, + uri: rdflib.URIRef | None, + habitat: str | None, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + base_iri: rdflib.Namespace, + ) -> None: + """Adds Habitat Value to the Graph + + Args: + uri: URI to use for this node + habitat: Habitat from the CSV + dataset: Dataset this belongs to + graph: Graph to add to + base_iri: Namespace used to construct IRIs + """ + # Check Existence + if uri is None: + return + + # Habitat Value + graph.add((uri, a, utils.namespaces.TERN.IRI)) + graph.add((uri, a, utils.namespaces.TERN.Value)) + if habitat: + # Add label + graph.add((uri, rdflib.RDFS.label, rdflib.Literal(habitat))) + + # Retrieve vocab for field + vocab = self.fields()["habitat"].get_flexible_vocab() + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset, base_iri=base_iri).get(habitat) + # Add value + graph.add((uri, rdflib.RDF.value, term)) + + def add_habitat_collection( + self, + uri: rdflib.URIRef | None, + habitat: str | None, + habitat_attribute: rdflib.URIRef | None, + provider_record_id_occurrence: rdflib.URIRef, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Add a habitat attribute Sample Collection to the graph + + Args: + uri: The uri for the SampleCollection. + habitat: Habitat value from template. + habitat_attribute: The uri for the attribute node. + provider_record_id_occurrence: Occurrence associated with this + node + dataset: The uri for the dateset node. + graph: The graph. + """ + if uri is None: + return + + # Add type + graph.add((uri, a, rdflib.SDO.Collection)) + # Add identifier + if habitat: + graph.add((uri, rdflib.SDO.name, rdflib.Literal(f"Occurrence Collection - Habitat - {habitat}"))) + # Add link to dataset + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + # add link to the sample field + graph.add((uri, rdflib.SDO.member, provider_record_id_occurrence)) + # Add link to attribute + if habitat_attribute: + graph.add((uri, utils.namespaces.TERN.hasAttribute, habitat_attribute)) + + def add_basis_attribute( + self, + uri: rdflib.URIRef | None, + basis_of_record: str | None, + dataset: rdflib.URIRef, + basis_value: rdflib.URIRef | None, + graph: rdflib.Graph, + ) -> None: + """Adds Basis of Record Attribute to the Graph + + Args: + uri: URI to use for this node. + basis_of_record: basisOfRecord value from the CSV + dataset: Dataset this belongs to + basis_value: Basis of Record Value associated with this node + graph (rdflib.Graph): Graph to add to + """ + # Check Existence + if uri is None: + return + + # Basis of Record Attribute + graph.add((uri, a, utils.namespaces.TERN.Attribute)) + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + graph.add((uri, utils.namespaces.TERN.attribute, CONCEPT_BASIS_OF_RECORD)) + if basis_of_record: + graph.add((uri, utils.namespaces.TERN.hasSimpleValue, rdflib.Literal(basis_of_record))) + if basis_value: + graph.add((uri, utils.namespaces.TERN.hasValue, basis_value)) + + def add_basis_value( + self, + uri: rdflib.URIRef | None, + basis_of_record: str | None, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + base_iri: rdflib.Namespace, + ) -> None: + """Adds Basis of Record Value to the Graph + + Args: + uri: URI to use for this node + basis_of_record: basisOfRecord value from the CSV + dataset: Dataset this belongs to + graph: Graph to add to + base_iri: Namespace used to construct IRIs + """ + # Check Existence + if uri is None: + return + + # Basis of Record Value + graph.add((uri, a, utils.namespaces.TERN.IRI)) + graph.add((uri, a, utils.namespaces.TERN.Value)) + if basis_of_record: + # Add label + graph.add((uri, rdflib.RDFS.label, rdflib.Literal(basis_of_record))) + + # Retrieve vocab for field + vocab = self.fields()["basisOfRecord"].get_flexible_vocab() + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset, base_iri=base_iri).get(basis_of_record) + # Add value + graph.add((uri, rdflib.RDF.value, term)) + + def add_basis_collection( + self, + uri: rdflib.URIRef | None, + basis_of_record: str | None, + basis_attribute: rdflib.URIRef | None, + sample_specimen: rdflib.URIRef, + provider_record_id_occurrence: rdflib.URIRef, + row: frictionless.Row, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Add a basisOfRecord attribute Sample Collection to the graph + + Either the sample_specimen node or the provider_record_id_occurrence node + should be a member of this collection, depending on if the row has a specimen. + + Args: + uri: The uri for the SampleCollection. + basis_of_record: basisOfRecord value from template. + basis_attribute: The uri for the attribute node. + sample_specimen: The sample specimen node. + provider_record_id_occurrence: Occurrence associated with this + node + row: The CSV row. + dataset: The uri for the dateset node. + graph: The graph. + """ + if uri is None: + return + + # Add type + graph.add((uri, a, rdflib.SDO.Collection)) + # Add identifier + if basis_of_record: + graph.add( + ( + uri, + rdflib.SDO.name, + rdflib.Literal(f"Occurrence Collection - Basis Of Record - {basis_of_record}"), + ) + ) + # Add link to dataset + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + # add link to the appropriate sample node + if has_specimen(row): + graph.add((uri, rdflib.SDO.member, sample_specimen)) + else: + graph.add((uri, rdflib.SDO.member, provider_record_id_occurrence)) + # Add link to attribute + if basis_attribute: + graph.add((uri, utils.namespaces.TERN.hasAttribute, basis_attribute)) + + def add_owner_institution_provider( + self, + uri: rdflib.URIRef | None, + row: frictionless.Row, + graph: rdflib.Graph, + ) -> None: + """Adds Owner Institution Provider to the Graph + + Args: + uri (rdflib.URIRef | None): URI to use for this node + row (frictionless.Row): Row to retrieve data from + graph (rdflib.Graph): Graph to add to + """ + # TODO -> Retrieve this from a known list of institutions + # Check Existence + if not row["ownerRecordIDSource"] or uri is None: + return + + # Owner Institution Provider + graph.add((uri, a, rdflib.PROV.Agent)) + graph.add((uri, rdflib.SDO.name, rdflib.Literal(row["ownerRecordIDSource"]))) + + def add_occurrence_status_observation( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + dataset: rdflib.URIRef, + provider_record_id_occurrence: rdflib.URIRef, + occurrence_status_value: rdflib.URIRef, + site_visit_id_temporal_map: dict[str, str] | None, + graph: rdflib.Graph, + ) -> None: + """Adds Occurrence Status Observation to the Graph + + Args: + uri (rdflib.URIRef): URI to use for this node. + row (frictionless.Row): Row to retrieve data from + dataset (rdflib.URIRef): Dataset this belongs to + provider_record_id_occurrence (rdflib.URIRef): Occurrence associated with this + node + occurrence_status_value (rdflib.URIRef): Occurrence Status Value + associated with this node + site_visit_id_temporal_map (dict[str, str] | None): Map of site visit + id to default temporal entity as rdf. + graph (rdflib.Graph): Graph to add to + """ + # Check Existence + if not row["occurrenceStatus"]: + return + + # Get Timestamp + event_date: models.temporal.Timestamp | None = row["eventDateStart"] + + # Retrieve Vocab or Create on the Fly + vocab = vocabs.sampling_protocol.HUMAN_OBSERVATION.iri # Always Human Observation + + # Occurrence Status Observation + graph.add((uri, a, utils.namespaces.TERN.Observation)) + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + graph.add((uri, rdflib.RDFS.comment, rdflib.Literal("occurrenceStatus-observation"))) + graph.add((uri, rdflib.SOSA.hasFeatureOfInterest, provider_record_id_occurrence)) + graph.add((uri, rdflib.SOSA.hasResult, occurrence_status_value)) + graph.add((uri, rdflib.SOSA.hasSimpleResult, rdflib.Literal(row["occurrenceStatus"]))) + graph.add((uri, rdflib.SOSA.observedProperty, CONCEPT_OCCURRENCE_STATUS)) + graph.add((uri, rdflib.SOSA.usedProcedure, vocab)) + + # Declare temporal entity to allow correct assignment typechecks + temporal_entity: rdflib.term.Node | None = None + # Check event date supplied + if event_date is not None: + temporal_entity = rdflib.BNode() + graph.add((uri, rdflib.TIME.hasTime, temporal_entity)) + graph.add((temporal_entity, a, rdflib.TIME.Instant)) + graph.add((temporal_entity, event_date.rdf_in_xsd, event_date.to_rdf_literal())) + else: + # Use default rdf from site visit as temporal entity + temporal_entity = self.add_default_temporal_entity( + uri=uri, + site_visit_id_temporal_map=site_visit_id_temporal_map, + row=row, + graph=graph, + ) + # Add comment to temporal entity + if temporal_entity is not None: + comment = "Date unknown, site visit dates used as proxy." + graph.add((temporal_entity, rdflib.RDFS.comment, rdflib.Literal(comment))) + + # Add method comment to node + method_comment = "Observation method unknown, 'human observation' used as proxy" + graph.add((uri, rdflib.RDFS.comment, rdflib.Literal(method_comment))) + + def add_occurrence_status_value( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + base_iri: rdflib.Namespace, + ) -> None: + """Adds Occurrence Status Value to the Graph + + Args: + uri (rdflib.URIRef): URI to use for this node + row (frictionless.Row): Row to retrieve data from + dataset (rdflib.URIRef): Dataset this belongs to + graph (rdflib.Graph): Graph to add to + base_iri (rdflib.Namespace): Namespace used to construct IRIs + """ + # Check Existence + if not row["occurrenceStatus"]: + return + + # Retrieve vocab for field + vocab = self.fields()["occurrenceStatus"].get_flexible_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset, base_iri=base_iri).get(row["occurrenceStatus"]) + + # Occurrence Status Value + graph.add((uri, a, utils.namespaces.TERN.IRI)) + graph.add((uri, a, utils.namespaces.TERN.Value)) + graph.add((uri, rdflib.RDFS.label, rdflib.Literal(f"occurrenceStatus = {row['occurrenceStatus']}"))) + graph.add((uri, rdflib.RDF.value, term)) + + def add_preparations_attribute( + self, + uri: rdflib.URIRef | None, + preparations: str | None, + dataset: rdflib.URIRef, + preparations_value: rdflib.URIRef | None, + graph: rdflib.Graph, + ) -> None: + """Adds Preparations Attribute to the Graph + + Args: + uri: URI to use for this node. + preparations: preparations value from the CSV + dataset: Dataset this belongs to + preparations_value: Preparations Value associated with this node + graph: Graph to add to + """ + # Check Existence + if uri is None: + return + + # Preparations Attribute + graph.add((uri, a, utils.namespaces.TERN.Attribute)) + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + graph.add((uri, utils.namespaces.TERN.attribute, CONCEPT_PREPARATIONS)) + if preparations: + graph.add((uri, utils.namespaces.TERN.hasSimpleValue, rdflib.Literal(preparations))) + if preparations_value: + graph.add((uri, utils.namespaces.TERN.hasValue, preparations_value)) + + def add_preparations_value( + self, + uri: rdflib.URIRef | None, + preparations: str | None, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + base_iri: rdflib.Namespace, + ) -> None: + """Adds Preparations Value to the Graph + + Args: + uri: URI to use for this node + preparations: preparations value from the CSV + dataset: Dataset this belongs to + graph: Graph to add to + base_iri: Namespace used to construct IRIs + """ + # Check Existence + if uri is None: + return + + # Preparations Value + graph.add((uri, a, utils.namespaces.TERN.IRI)) + graph.add((uri, a, utils.namespaces.TERN.Value)) + if preparations: + # Add label + graph.add((uri, rdflib.RDFS.label, rdflib.Literal(preparations))) + + # Retrieve vocab for field + vocab = self.fields()["preparations"].get_flexible_vocab() + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset, base_iri=base_iri).get(preparations) + # Add value + graph.add((uri, rdflib.RDF.value, term)) + + def add_preparations_collection( + self, + uri: rdflib.URIRef | None, + preparations: str | None, + preparations_attribute: rdflib.URIRef | None, + sample_specimen: rdflib.URIRef, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Add a preparations attribute Sample Collection to the graph + + Args: + uri: The uri for the SampleCollection. + preparations: preparations value from template. + preparations_attribute: The uri for the attribute node. + sample_specimen: The sample specimen node that should be a member of the collection. + dataset: The uri for the dateset node. + graph: The graph. + """ + if uri is None: + return + + # Add type + graph.add((uri, a, rdflib.SDO.Collection)) + # Add identifier + if preparations: + graph.add( + ( + uri, + rdflib.SDO.name, + rdflib.Literal(f"Occurrence Collection - Preparations - {preparations}"), + ) + ) + # Add link to dataset + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + # add link to the sample_specimen node + graph.add((uri, rdflib.SDO.member, sample_specimen)) + # Add link to attribute + if preparations_attribute: + graph.add((uri, utils.namespaces.TERN.hasAttribute, preparations_attribute)) + + def add_establishment_means_observation( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + dataset: rdflib.URIRef, + provider_record_id_occurrence: rdflib.URIRef, + establishment_means_value: rdflib.URIRef, + site_visit_id_temporal_map: dict[str, str] | None, + graph: rdflib.Graph, + ) -> None: + """Adds Establishment Means Observation to the Graph + + Args: + uri (rdflib.URIRef): URI to use for this node. + row (frictionless.Row): Row to retrieve data from + dataset (rdflib.URIRef): Dataset this belongs to + provider_record_id_occurrence (rdflib.URIRef): Occurrence associated with this + node + establishment_means_value (rdflib.URIRef): Establishment Means + Value associated with this node + site_visit_id_temporal_map (dict[str, str] | None): Map of site visit + id to default rdf to use for temporal entity. + graph (rdflib.Graph): Graph to add to + """ + # Check Existence + if not row["establishmentMeans"]: + return + + # Get Timestamp + event_date: models.temporal.Timestamp | None = row["eventDateStart"] + + # Retrieve Vocab or Create on the Fly + vocab = vocabs.sampling_protocol.HUMAN_OBSERVATION.iri # Always Human Observation + + # Establishment Means Observation + graph.add((uri, a, utils.namespaces.TERN.Observation)) + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + graph.add((uri, rdflib.RDFS.comment, rdflib.Literal("establishmentMeans-observation"))) + graph.add((uri, rdflib.SOSA.hasFeatureOfInterest, provider_record_id_occurrence)) + graph.add((uri, rdflib.SOSA.hasResult, establishment_means_value)) + graph.add((uri, rdflib.SOSA.hasSimpleResult, rdflib.Literal(row["establishmentMeans"]))) + graph.add((uri, rdflib.SOSA.observedProperty, CONCEPT_ESTABLISHMENT_MEANS)) + graph.add((uri, rdflib.SOSA.usedProcedure, vocab)) + + # Declare temporal entity to allow correct assignment typechecks + temporal_entity: rdflib.term.Node | None = None + # Check eventDateStart supplied + if event_date is not None: + temporal_entity = rdflib.BNode() + graph.add((uri, rdflib.TIME.hasTime, temporal_entity)) + graph.add((temporal_entity, a, rdflib.TIME.Instant)) + graph.add((temporal_entity, event_date.rdf_in_xsd, event_date.to_rdf_literal())) + # Add comment to temporal entity + comment = "Date unknown, template eventDateStart used as proxy" + graph.add((temporal_entity, rdflib.RDFS.comment, rdflib.Literal(comment))) + else: + # Use default rdf from site visit as temporal entity + temporal_entity = self.add_default_temporal_entity( + uri=uri, + site_visit_id_temporal_map=site_visit_id_temporal_map, + row=row, + graph=graph, + ) + # Add comment to temporal entity + if temporal_entity is not None: + comment = "Date unknown, site visit dates used as proxy." + graph.add((temporal_entity, rdflib.RDFS.comment, rdflib.Literal(comment))) + + # Add method comment to node + method_comment = "Observation method unknown, 'human observation' used as proxy" + graph.add((uri, rdflib.RDFS.comment, rdflib.Literal(method_comment))) + + def add_establishment_means_value( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + base_iri: rdflib.Namespace, + ) -> None: + """Adds Establishment Means Value to the Graph + + Args: + uri (rdflib.URIRef): URI to use for this node + row (frictionless.Row): Row to retrieve data from + dataset (rdflib.URIRef): Dataset this belongs to + graph (rdflib.Graph): Graph to add to + base_iri (rdflib.Namespace): Namespace used to construct IRIs + """ + # Check Existence + if not row["establishmentMeans"]: + return + + # Retrieve vocab for field + vocab = self.fields()["establishmentMeans"].get_flexible_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset, base_iri=base_iri).get(row["establishmentMeans"]) + + # Establishment Means Value + graph.add((uri, a, utils.namespaces.TERN.IRI)) + graph.add((uri, a, utils.namespaces.TERN.Value)) + graph.add((uri, rdflib.RDFS.label, rdflib.Literal("establishmentMeans-value"))) + graph.add((uri, rdflib.RDF.value, term)) + + def add_life_stage_observation( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + dataset: rdflib.URIRef, + provider_record_id_occurrence: rdflib.URIRef, + sample_specimen: rdflib.URIRef, + life_stage_value: rdflib.URIRef, + site_visit_id_temporal_map: dict[str, str] | None, + graph: rdflib.Graph, + ) -> None: + """Adds Life Stage Observation to the Graph + + Args: + uri (rdflib.URIRef): URI to use for this node. + row (frictionless.Row): Row to retrieve data from + dataset (rdflib.URIRef): Dataset this belongs to + provider_record_id_occurrence (rdflib.URIRef): Occurrence associated with this + node + sample_specimen (rdflib.URIRef): Sample Specimen associated with + this node + life_stage_value (rdflib.URIRef): Life Stage Value associated with + this node + site_visit_id_temporal_map (dict[str, str] | None): Map of site visit + id to temporal entity rdf default map. + graph (rdflib.Graph): Graph to add to + """ + # Check Existence + if not row["lifeStage"]: + return + + # Get Timestamp + event_date: models.temporal.Timestamp | None = row["eventDateStart"] + + # Choose Feature of Interest + # The Feature of Interest is the Specimen Sample if it is determined + # that this row has a specimen, otherwise it is Field Sample + foi = sample_specimen if has_specimen(row) else provider_record_id_occurrence + + # Retrieve Vocab or Create on the Fly + vocab = vocabs.sampling_protocol.HUMAN_OBSERVATION.iri # Always Human Observation + + # Life Stage Observation + graph.add((uri, a, utils.namespaces.TERN.Observation)) + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + graph.add((uri, rdflib.RDFS.comment, rdflib.Literal("lifeStage-observation"))) + graph.add((uri, rdflib.SOSA.hasFeatureOfInterest, foi)) + graph.add((uri, rdflib.SOSA.hasResult, life_stage_value)) + graph.add((uri, rdflib.SOSA.hasSimpleResult, rdflib.Literal(row["lifeStage"]))) + graph.add((uri, rdflib.SOSA.observedProperty, CONCEPT_LIFE_STAGE)) + graph.add((uri, rdflib.SOSA.usedProcedure, vocab)) + + # Declare temporal entity to allow correct assignment typechecks + temporal_entity: rdflib.term.Node | None = None + # Check eventDateStart supplied + if event_date is not None: + temporal_entity = rdflib.BNode() + graph.add((uri, rdflib.TIME.hasTime, temporal_entity)) + graph.add((temporal_entity, a, rdflib.TIME.Instant)) + graph.add((temporal_entity, event_date.rdf_in_xsd, event_date.to_rdf_literal())) + + # Add comment to temporal entity + comment = "Date unknown, template eventDateStart used as proxy" + graph.add((temporal_entity, rdflib.RDFS.comment, rdflib.Literal(comment))) + else: + # Use default rdf from site visit as temporal entity + temporal_entity = self.add_default_temporal_entity( + uri=uri, + site_visit_id_temporal_map=site_visit_id_temporal_map, + row=row, + graph=graph, + ) + # Add comment to temporal entity + if temporal_entity is not None: + comment = "Date unknown, site visit dates used as proxy." + graph.add((temporal_entity, rdflib.RDFS.comment, rdflib.Literal(comment))) + + # Add method comment to node + method_comment = "Observation method unknown, 'human observation' used as proxy" + graph.add((uri, rdflib.RDFS.comment, rdflib.Literal(method_comment))) + + def add_life_stage_value( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + base_iri: rdflib.Namespace, + ) -> None: + """Adds Life Stage Value to the Graph + + Args: + uri (rdflib.URIRef): URI to use for this node + row (frictionless.Row): Row to retrieve data from + dataset (rdflib.URIRef): Dataset this belongs to + graph (rdflib.Graph): Graph to add to + base_iri (rdflib.Namespace): Namespace used to construct IRIs + """ + # Check Existence + if not row["lifeStage"]: + return + + # Retrieve vocab for field + vocab = self.fields()["lifeStage"].get_flexible_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset, base_iri=base_iri).get(row["lifeStage"]) + + # Life Stage Value + graph.add((uri, a, utils.namespaces.TERN.IRI)) + graph.add((uri, a, utils.namespaces.TERN.Value)) + graph.add((uri, rdflib.RDFS.label, rdflib.Literal("lifeStage-value"))) + graph.add((uri, rdflib.RDF.value, term)) + + def add_sex_observation( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + dataset: rdflib.URIRef, + provider_record_id_occurrence: rdflib.URIRef, + sample_specimen: rdflib.URIRef, + sex_value: rdflib.URIRef, + site_visit_id_temporal_map: dict[str, str] | None, + graph: rdflib.Graph, + ) -> None: + """Adds Sex Observation to the Graph + + Args: + uri (rdflib.URIRef): URI to use for this node. + row (frictionless.Row): Row to retrieve data from + dataset (rdflib.URIRef): Dataset this belongs to + provider_record_id_occurrence (rdflib.URIRef): Occurrence associated with this + node + sample_specimen (rdflib.URIRef): Sample Specimen associated with + this node + sex_value (rdflib.URIRef): Sex Value associated with this node + site_visit_id_temporal_map (dict[str, str] | None): Map of site visit + id to default temporal entity rdf. + graph (rdflib.Graph): Graph to add to + """ + # Check Existence + if not row["sex"]: + return + + # Get Timestamp + event_date: models.temporal.Timestamp | None = row["eventDateStart"] + + # Choose Feature of Interest + # The Feature of Interest is the Specimen Sample if it is determined + # that this row has a specimen, otherwise it is Field Sample + foi = sample_specimen if has_specimen(row) else provider_record_id_occurrence + + # Retrieve Vocab or Create on the Fly + vocab = vocabs.sampling_protocol.HUMAN_OBSERVATION.iri # Always Human Observation + + # Sex Observation + graph.add((uri, a, utils.namespaces.TERN.Observation)) + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + graph.add((uri, rdflib.RDFS.comment, rdflib.Literal("sex-observation"))) + graph.add((uri, rdflib.SOSA.hasFeatureOfInterest, foi)) + graph.add((uri, rdflib.SOSA.hasResult, sex_value)) + graph.add((uri, rdflib.SOSA.hasSimpleResult, rdflib.Literal(row["sex"]))) + graph.add((uri, rdflib.SOSA.observedProperty, CONCEPT_SEX)) + graph.add((uri, rdflib.SOSA.usedProcedure, vocab)) + + # Declare temporal entity to allow correct assignment typechecks + temporal_entity: rdflib.term.Node | None = None + # Check eventDateStart provided + if event_date is not None: + temporal_entity = rdflib.BNode() + graph.add((uri, rdflib.TIME.hasTime, temporal_entity)) + graph.add((temporal_entity, a, rdflib.TIME.Instant)) + graph.add((temporal_entity, event_date.rdf_in_xsd, event_date.to_rdf_literal())) + # Add comment to temporal entity + comment = "Date unknown, template eventDateStart used as proxy" + graph.add((temporal_entity, rdflib.RDFS.comment, rdflib.Literal(comment))) + else: + # Use default rdf from site visit as temporal entity + temporal_entity = self.add_default_temporal_entity( + uri=uri, + site_visit_id_temporal_map=site_visit_id_temporal_map, + row=row, + graph=graph, + ) + # Add comment to temporal entity + if temporal_entity is not None: + comment = "Date unknown, site visit dates used as proxy." + graph.add((temporal_entity, rdflib.RDFS.comment, rdflib.Literal(comment))) + + # Add method comment to node + method_comment = "Observation method unknown, 'human observation' used as proxy" + graph.add((uri, rdflib.RDFS.comment, rdflib.Literal(method_comment))) + + def add_sex_value( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + base_iri: rdflib.Namespace, + ) -> None: + """Adds Sex Value to the Graph + + Args: + uri (rdflib.URIRef): URI to use for this node + row (frictionless.Row): Row to retrieve data from + dataset (rdflib.URIRef): Dataset this belongs to + graph (rdflib.Graph): Graph to add to + base_iri (rdflib.Namespace): Namespace used to construct IRIs + """ + # Check Existence + if not row["sex"]: + return + + # Retrieve vocab for field + vocab = self.fields()["sex"].get_flexible_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset, base_iri=base_iri).get(row["sex"]) + + # Sex Value + graph.add((uri, a, utils.namespaces.TERN.IRI)) + graph.add((uri, a, utils.namespaces.TERN.Value)) + graph.add((uri, rdflib.RDFS.label, rdflib.Literal("sex-value"))) + graph.add((uri, rdflib.RDF.value, term)) + + def add_reproductive_condition_observation( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + dataset: rdflib.URIRef, + provider_record_id_occurrence: rdflib.URIRef, + sample_specimen: rdflib.URIRef, + reproductive_condition_value: rdflib.URIRef, + site_visit_id_temporal_map: dict[str, str] | None, + graph: rdflib.Graph, + ) -> None: + """Adds Reproductive Condition Observation to the Graph + + Args: + uri (rdflib.URIRef): URI to use for this node. + row (frictionless.Row): Row to retrieve data from + dataset (rdflib.URIRef): Dataset this belongs to + provider_record_id_occurrence (rdflib.URIRef): Occurrence associated with this + node + sample_specimen (rdflib.URIRef): Sample Specimen associated with + this node + reproductive_condition_value (rdflib.URIRef): Reproductive + Condition Value associated with this node + site_visit_id_temporal_map (dict[str, str] | None): Map of site visit + id to temporal entity rdf. + graph (rdflib.Graph): Graph to add to + """ + # Check Existence + if not row["reproductiveCondition"]: + return + + # Get Timestamp + event_date: models.temporal.Timestamp | None = row["eventDateStart"] + + # Choose Feature of Interest + # The Feature of Interest is the Specimen Sample if it is determined + # that this row has a specimen, otherwise it is Field Sample + foi = sample_specimen if has_specimen(row) else provider_record_id_occurrence + + # Retrieve Vocab or Create on the Fly + vocab = vocabs.sampling_protocol.HUMAN_OBSERVATION.iri # Always Human Observation + + # Reproductive Condition Observation + graph.add((uri, a, utils.namespaces.TERN.Observation)) + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + graph.add((uri, rdflib.RDFS.comment, rdflib.Literal("reproductiveCondition-observation"))) + graph.add((uri, rdflib.SOSA.hasFeatureOfInterest, foi)) + graph.add((uri, rdflib.SOSA.hasResult, reproductive_condition_value)) + graph.add((uri, rdflib.SOSA.hasSimpleResult, rdflib.Literal(row["reproductiveCondition"]))) + graph.add((uri, rdflib.SOSA.observedProperty, CONCEPT_REPRODUCTIVE_CONDITION)) + graph.add((uri, rdflib.SOSA.usedProcedure, vocab)) + + # Declare temporal entity to allow correct assignment typechecks + temporal_entity: rdflib.term.Node | None = None + # Check eventDateStart provided + if event_date is not None: + temporal_entity = rdflib.BNode() + graph.add((uri, rdflib.TIME.hasTime, temporal_entity)) + graph.add((temporal_entity, a, rdflib.TIME.Instant)) + graph.add((temporal_entity, event_date.rdf_in_xsd, event_date.to_rdf_literal())) + # Add comment to temporal entity + comment = "Date unknown, template eventDateStart used as proxy" + graph.add((temporal_entity, rdflib.RDFS.comment, rdflib.Literal(comment))) + else: + # Use default rdf from site visit as temporal entity + temporal_entity = self.add_default_temporal_entity( + uri=uri, + site_visit_id_temporal_map=site_visit_id_temporal_map, + row=row, + graph=graph, + ) + # Add comment to temporal entity + if temporal_entity is not None: + comment = "Date unknown, site visit dates used as proxy." + graph.add((temporal_entity, rdflib.RDFS.comment, rdflib.Literal(comment))) + + # Add method comment to node + method_comment = "Observation method unknown, 'human observation' used as proxy" + graph.add((uri, rdflib.RDFS.comment, rdflib.Literal(method_comment))) + + def add_reproductive_condition_value( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + base_iri: rdflib.Namespace, + ) -> None: + """Adds Reproductive Condition Value to the Graph + + Args: + uri (rdflib.URIRef): URI to use for this node + row (frictionless.Row): Row to retrieve data from + dataset (rdflib.URIRef): Dataset this belongs to + graph (rdflib.Graph): Graph to add to + base_iri (rdflib.Namespace): Namespace used to construct IRIs + """ + # Check Existence + if not row["reproductiveCondition"]: + return + + # Retrieve vocab for field + vocab = self.fields()["reproductiveCondition"].get_flexible_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset, base_iri=base_iri).get(row["reproductiveCondition"]) + + # Reproductive Condition Value + graph.add((uri, a, utils.namespaces.TERN.IRI)) + graph.add((uri, a, utils.namespaces.TERN.Value)) + graph.add((uri, rdflib.RDFS.label, rdflib.Literal("reproductiveCondition-value"))) + graph.add((uri, rdflib.RDF.value, term)) + + def add_accepted_name_usage_observation( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + dataset: rdflib.URIRef, + scientific_name: rdflib.URIRef, + accepted_name_usage_value: rdflib.URIRef, + site_visit_id_temporal_map: dict[str, str] | None, + graph: rdflib.Graph, + ) -> None: + """Adds Accepted Name Usage Observation to the Graph + + Args: + uri (rdflib.URIRef): URI to use for this node. + row (frictionless.Row): Row to retrieve data from. + dataset (rdflib.URIRef): Dataset this belongs to. + scientific_name (rdflib.URIRef): Scientific Name associated with + this node. + accepted_name_usage_value (rdflib.URIRef): Accepted Name Usage + Value associated with this node. + site_visit_id_temporal_map (dict[str, str] | None): Map of site visit + id to default temporal entity as rdf. + graph (rdflib.Graph): Graph to add to. + """ + # Check Existence + if not row["acceptedNameUsage"]: + return + + # Get Timestamp + date_identified: models.temporal.Timestamp | None = row["dateIdentified"] or row["eventDateStart"] + + # Accepted Name Usage Observation + graph.add((uri, a, utils.namespaces.TERN.Observation)) + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + graph.add((uri, rdflib.RDFS.comment, rdflib.Literal("acceptedNameUsage-observation"))) + graph.add((uri, rdflib.SOSA.hasFeatureOfInterest, scientific_name)) + graph.add((uri, rdflib.SOSA.hasResult, accepted_name_usage_value)) + graph.add((uri, rdflib.SOSA.hasSimpleResult, rdflib.Literal(row["acceptedNameUsage"]))) + graph.add((uri, rdflib.SOSA.observedProperty, CONCEPT_TAXON)) + graph.add((uri, rdflib.SOSA.usedProcedure, CONCEPT_NAME_CHECK_METHOD)) + + # Declare temporal entity to allow correct assignment typechecks + temporal_entity: rdflib.term.Node | None = None + # Check date supplied within template + if date_identified is not None: + temporal_entity = rdflib.BNode() + graph.add((uri, rdflib.TIME.hasTime, temporal_entity)) + graph.add((temporal_entity, a, rdflib.TIME.Instant)) + graph.add((temporal_entity, date_identified.rdf_in_xsd, date_identified.to_rdf_literal())) + # Add comment to temporal entity + timestamp_used = ( + "dateIdentified" if row["dateIdentified"] else "eventDateStart" + ) # Determine which field was used + comment = f"Date unknown, template {timestamp_used} used as proxy" + graph.add((temporal_entity, rdflib.RDFS.comment, rdflib.Literal(comment))) + else: + # Use default rdf from site visit as temporal entity + temporal_entity = self.add_default_temporal_entity( + uri=uri, + site_visit_id_temporal_map=site_visit_id_temporal_map, + row=row, + graph=graph, + ) + # Add comment to temporal entity + if temporal_entity is not None: + comment = "Date unknown, site visit dates used as proxy." + graph.add((temporal_entity, rdflib.RDFS.comment, rdflib.Literal(comment))) + + def add_accepted_name_usage_value( + self, + uri: rdflib.URIRef, + dataset: rdflib.URIRef, + row: frictionless.Row, + graph: rdflib.Graph, + ) -> None: + """Adds Accepted Name Usage Value to the Graph + + Args: + uri (rdflib.URIRef): URI to use for this node + dataset (rdflib.URIRef): Dataset this belongs to + row (frictionless.Row): Row to retrieve data from + graph (rdflib.Graph): Graph to add to + """ + # Check Existence + if not row["acceptedNameUsage"]: + return + + # Accepted Name Usage Value + graph.add((uri, a, utils.namespaces.TERN.Text)) + graph.add((uri, a, utils.namespaces.TERN.Value)) + graph.add((uri, a, utils.namespaces.TERN.FeatureOfInterest)) + graph.add((uri, rdflib.RDFS.label, rdflib.Literal("acceptedNameUsage-value"))) + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + graph.add((uri, rdflib.RDF.value, rdflib.Literal(row["acceptedNameUsage"]))) + graph.add((uri, utils.namespaces.TERN.featureType, CONCEPT_ACCEPTED_NAME_USAGE)) + + def add_sampling_sequencing( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + dataset: rdflib.URIRef, + feature_of_interest: rdflib.URIRef, + sample_sequence: rdflib.URIRef, + site_id_geometry_map: dict[str, str] | None, + site_visit_id_temporal_map: dict[str, str] | None, + graph: rdflib.Graph, + base_iri: rdflib.Namespace, + ) -> None: + """Adds Sampling Sequencing to the Graph + + Args: + uri (rdflib.URIRef): URI to use for this node. + row (frictionless.Row): Row to retrieve data from + dataset (rdflib.URIRef): Dataset this belongs to + feature_of_interest (rdflib.URIRef): Feature of Interest associated + with this node + sample_sequence (rdflib.URIRef): Sample Sequence associated with + this node + site_id_geometry_map (dict[str, str] | None): Map of default geometry + string values for a given site id. + site_visit_id_temporal_map (dict[str, str] | None): Map of site visit + id to default temporal entity rdf. + graph (rdflib.Graph): Graph to add to + base_iri (rdflib.Namespace): Namespace used to construct IRIs + """ + # Check Existence + if not row["associatedSequences"]: + return + + # Extract values + latitude = row["decimalLatitude"] + longitude = row["decimalLongitude"] + geodetic_datum = row["geodeticDatum"] + site_id = row["siteID"] + event_date: models.temporal.Timestamp | None = row["eventDateStart"] + + if latitude is not None and longitude is not None: + # Create geometry + geometry = models.spatial.Geometry( + raw=models.spatial.LatLong(latitude, longitude), + datum=geodetic_datum, + ) + + elif site_id_geometry_map is not None and (default_geometry := site_id_geometry_map.get(site_id)) is not None: + # Create geometry from wkt literal + geometry = models.spatial.Geometry.from_geosparql_wkt_literal(default_geometry) + + else: + # Should not be able to reach here if validated data provided, + # but if it does then node will be ommitted from graph. + return + + # Retrieve vocab for field + vocab = self.fields()["sequencingMethod"].get_flexible_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset, base_iri=base_iri).get(row["sequencingMethod"]) + + # Add to Graph + graph.add((uri, a, utils.namespaces.TERN.Sampling)) + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + graph.add((uri, rdflib.RDFS.comment, rdflib.Literal("sequencing-sampling"))) + graph.add((uri, rdflib.SOSA.hasFeatureOfInterest, feature_of_interest)) + graph.add((uri, rdflib.SOSA.hasResult, sample_sequence)) + + # Declare temporal entity to allow correct assignment typechecks + temporal_entity: rdflib.term.Node | None = None + # Determine eventDateStart supplied + if event_date is not None: + temporal_entity = rdflib.BNode() + graph.add((uri, rdflib.TIME.hasTime, temporal_entity)) + graph.add((temporal_entity, a, rdflib.TIME.Instant)) + graph.add((temporal_entity, event_date.rdf_in_xsd, event_date.to_rdf_literal())) + graph.add((uri, rdflib.SOSA.usedProcedure, term)) + # Add comment to temporal entity + comment = "Date unknown, template eventDateStart used as proxy" + graph.add((temporal_entity, rdflib.RDFS.comment, rdflib.Literal(comment))) + else: + # Use default rdf from site visit as temporal entity + temporal_entity = self.add_default_temporal_entity( + uri=uri, + site_visit_id_temporal_map=site_visit_id_temporal_map, + row=row, + graph=graph, + ) + # Add comment to temporal entity + if temporal_entity is not None: + comment = "Date unknown, site visit dates used as proxy." + graph.add((temporal_entity, rdflib.RDFS.comment, rdflib.Literal(comment))) + + # Add geometry + geometry_node = rdflib.BNode() + graph.add((uri, utils.namespaces.GEO.hasGeometry, geometry_node)) + graph.add((geometry_node, a, utils.namespaces.GEO.Geometry)) + graph.add((geometry_node, utils.namespaces.GEO.asWKT, geometry.to_transformed_crs_rdf_literal())) + + self.add_geometry_supplied_as( + subj=uri, + pred=utils.namespaces.GEO.hasGeometry, + obj=geometry_node, + geom=geometry, + graph=graph, + ) + + # Check for coordinateUncertaintyInMeters + if row["coordinateUncertaintyInMeters"]: + # Add Spatial Accuracy + accuracy = rdflib.Literal(row["coordinateUncertaintyInMeters"], datatype=rdflib.XSD.double) + graph.add((uri, utils.namespaces.GEO.hasMetricSpatialAccuracy, accuracy)) + + # Add comment to geometry + spatial_comment = "Location unknown, location of field sampling used as proxy" + graph.add((geometry_node, rdflib.RDFS.comment, rdflib.Literal(spatial_comment))) + + def add_sample_sequence( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + dataset: rdflib.URIRef, + feature_of_interest: rdflib.URIRef, + sampling_sequencing: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Adds Sample Sequence to the Graph + + Args: + uri (rdflib.URIRef): URI to use for this node. + row (frictionless.Row): Row to retrieve data from + dataset (rdflib.URIRef): Dataset this belongs to + feature_of_interest (rdflib.URIRef): Feature of Interest associated + with this node + sampling_sequencing (rdflib.URIRef): Sampling Sequencing associated + with this node + graph (rdflib.Graph): Graph to add to + """ + # Check Existence + if not row["associatedSequences"]: + return + + # Add to Graph + graph.add((uri, a, utils.namespaces.TERN.FeatureOfInterest)) + graph.add((uri, a, utils.namespaces.TERN.Sample)) + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + graph.add((uri, rdflib.RDFS.comment, rdflib.Literal("sequence-sample"))) + graph.add((uri, rdflib.SOSA.isResultOf, sampling_sequencing)) + graph.add((uri, rdflib.SOSA.isSampleOf, feature_of_interest)) + graph.add((uri, utils.namespaces.TERN.featureType, CONCEPT_SEQUENCE)) + + # Loop Through Associated Sequences + for identifier in row["associatedSequences"]: + # Add Identifier + graph.add((uri, rdflib.SDO.identifier, rdflib.Literal(identifier))) + + def add_provider_determined_by( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + graph: rdflib.Graph, + ) -> None: + """Adds Determined By Provider to the Graph + + Args: + uri (rdflib.URIRef): URI to use for this node. + row (frictionless.Row): Row to retrieve data from + graph (rdflib.Graph): Graph to add to + """ + # Check for threatStatusDeterminedBy + if not row["threatStatusDeterminedBy"]: + return + + # Add to Graph + graph.add((uri, a, rdflib.PROV.Agent)) + graph.add((uri, rdflib.SDO.name, rdflib.Literal(row["threatStatusDeterminedBy"]))) + + def add_threat_status_observation( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + dataset: rdflib.URIRef, + provider_record_id_occurrence: rdflib.URIRef, + threat_status_value: rdflib.URIRef, + determined_by: rdflib.URIRef, + site_visit_id_temporal_map: dict[str, str] | None, + graph: rdflib.Graph, + base_iri: rdflib.Namespace, + ) -> None: + """Adds Threat Status Observation to the Graph + + Args: + uri (rdflib.URIRef): URI to use for this node. + row (frictionless.Row): Row to retrieve data from + dataset (rdflib.URIRef): Dataset this belongs to + provider_record_id_occurrence (rdflib.URIRef): Occurrence associated with this + node + threat_status_value (rdflib.URIRef): Threat Status Value associated + with this node + determined_by (rdflib.URIRef): Determined By Provider associated + with this node + site_visit_id_temporal_map (dict[str, str] | None): Map of site visit + id to default temporal entity as rdf. + graph (rdflib.Graph): Graph to add to + base_iri (rdflib.Namespace): Namespace used to construct IRIs + """ + # Check Existence + if not row["threatStatus"]: + return + + # Get Timestamp + # Prefer `threatStatusDateDetermined` > `dateIdentified` > `eventDateStart` (fallback) + date_determined: models.temporal.Timestamp | None = ( + row["threatStatusDateDetermined"] or row["dateIdentified"] or row["preparedDate"] or row["eventDateStart"] + ) + + # Retrieve vocab for field + vocab = self.fields()["threatStatusCheckProtocol"].get_flexible_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset, base_iri=base_iri).get(row["threatStatusCheckProtocol"]) + + # Threat Status Observation + graph.add((uri, a, utils.namespaces.TERN.Observation)) + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + graph.add((uri, rdflib.RDFS.comment, rdflib.Literal("threatStatus-observation"))) + graph.add((uri, rdflib.SOSA.hasFeatureOfInterest, provider_record_id_occurrence)) + graph.add((uri, rdflib.SOSA.hasResult, threat_status_value)) + graph.add((uri, rdflib.SOSA.hasSimpleResult, rdflib.Literal(row["threatStatus"]))) + graph.add((uri, rdflib.SOSA.observedProperty, CONCEPT_CONSERVATION_STATUS)) + graph.add((uri, rdflib.SOSA.usedProcedure, term)) + + # Declare temporal entity to allow correct assignment typechecks + temporal_entity: rdflib.term.Node | None = None + # Check date provided within template + if date_determined is not None: + temporal_entity = rdflib.BNode() + graph.add((uri, rdflib.TIME.hasTime, temporal_entity)) + graph.add((temporal_entity, a, rdflib.TIME.Instant)) + graph.add((temporal_entity, date_determined.rdf_in_xsd, date_determined.to_rdf_literal())) + # Check for threatStatusDeterminedBy + if row["threatStatusDeterminedBy"]: + # Add wasAssociatedWith + graph.add((uri, rdflib.PROV.wasAssociatedWith, determined_by)) + # Check for threatStatusDateDetermined + if not row["threatStatusDateDetermined"]: + # Determine Used Date Column + date_used = ( + "dateIdentified" + if row["dateIdentified"] + else "preparedDate" + if row["preparedDate"] + else "eventDateStart" + ) + # Add comment to temporal entity + comment = f"Date unknown, template {date_used} used as proxy" + graph.add((temporal_entity, rdflib.RDFS.comment, rdflib.Literal(comment))) + else: + # Use default rdf from site visit as temporal entity + temporal_entity = self.add_default_temporal_entity( + uri=uri, + site_visit_id_temporal_map=site_visit_id_temporal_map, + row=row, + graph=graph, + ) + # Add comment to temporal entity + if temporal_entity is not None: + comment = "Date unknown, site visit dates used as proxy." + graph.add((temporal_entity, rdflib.RDFS.comment, rdflib.Literal(comment))) + + def add_threat_status_value( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + base_iri: rdflib.Namespace, + ) -> None: + """Adds Threat Status Value to the Graph + + Args: + uri (rdflib.URIRef): URI to use for this node + row (frictionless.Row): Row to retrieve data from + dataset (rdflib.URIRef): Dataset this belongs to + graph (rdflib.Graph): Graph to add to + base_iri (rdflib.Namespace): Namespace used to construct IRIs + """ + # Check Existence + if not row["threatStatus"]: + return + + # Combine conservationAuthority and threatStatus + value = f"{row['conservationAuthority']}/{row['threatStatus']}" + + # Retrieve vocab for field + vocab = self.fields()["threatStatus"].get_flexible_vocab() + + # Retrieve term or Create on the Fly + term = vocab(graph=graph, source=dataset, base_iri=base_iri).get(value) + + # Threat Status Value + graph.add((uri, a, utils.namespaces.TERN.IRI)) + graph.add((uri, a, utils.namespaces.TERN.Value)) + graph.add((uri, rdflib.RDFS.label, rdflib.Literal(f"Conservation status = {row['threatStatus']}"))) + graph.add((uri, rdflib.RDF.value, term)) + + def add_conservation_authority_attribute( + self, + *, + uri: rdflib.URIRef | None, + conservation_authority: str | None, + conservation_authority_value: rdflib.URIRef | None, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Adds Conservation Authority Attribute to the Graph + + Args: + uri: URI to use for this node. + conservation_authority: conservationAuthority value from the CSV + conservation_authority_value: Conservation Authority Value associated with this node + dataset: Dataset this belongs to + graph: Graph to add to + """ + # Check Existence + if uri is None: + return + + # Conservation Authority Attribute + graph.add((uri, a, utils.namespaces.TERN.Attribute)) + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + graph.add((uri, utils.namespaces.TERN.attribute, CONCEPT_CONSERVATION_AUTHORITY)) + if conservation_authority: + graph.add((uri, utils.namespaces.TERN.hasSimpleValue, rdflib.Literal(conservation_authority))) + if conservation_authority_value: + graph.add((uri, utils.namespaces.TERN.hasValue, conservation_authority_value)) + + def add_conservation_authority_value( + self, + *, + uri: rdflib.URIRef | None, + conservation_authority: str | None, + graph: rdflib.Graph, + ) -> None: + """Adds Conservation Authority Value to the Graph + + Args: + uri: URI to use for this node + conservation_authority: conservationAuthority value from the CSV + graph: Graph to add to + base_iri: Namespace used to construct IRIs + """ + # Check Existence + if uri is None: + return + + graph.add((uri, a, utils.namespaces.TERN.IRI)) + graph.add((uri, a, utils.namespaces.TERN.Value)) + + if conservation_authority: + # Construct Label + graph.add((uri, rdflib.RDFS.label, rdflib.Literal(conservation_authority))) + + # Retrieve vocab for field + vocab = self.fields()["conservationAuthority"].get_vocab() + # Retrieve term + term = vocab().get(conservation_authority) + # Conservation Authority Value + graph.add((uri, rdflib.RDF.value, term)) + + def add_conservation_authority_collection( + self, + *, + uri: rdflib.URIRef | None, + conservation_authority: str | None, + conservation_authority_attribute: rdflib.URIRef | None, + threat_status_observation: rdflib.URIRef, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Add a conservation authority Collection to the graph + + Args: + uri: The uri for the SampleCollection. + conservation_authority: conservationAuthority value from template. + conservation_authority_attribute: The uri for the attribute node. + threat_status_observation: The node that should be a member of the collection. + dataset: The uri for the dateset node. + graph: The graph. + """ + if uri is None: + return + + # Add type + graph.add((uri, a, rdflib.SDO.Collection)) + # Add identifier + if conservation_authority: + graph.add( + ( + uri, + rdflib.SDO.name, + rdflib.Literal(f"Occurrence Collection - Conservation Authority - {conservation_authority}"), + ) + ) + # Add link to dataset + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + # add link to the threat status observation node + graph.add((uri, rdflib.SDO.member, threat_status_observation)) + # Add link to attribute + if conservation_authority_attribute: + graph.add((uri, utils.namespaces.TERN.hasAttribute, conservation_authority_attribute)) + + def add_organism_quantity_observation( + self, + uri: rdflib.URIRef, + dataset: rdflib.URIRef, + provider_record_id_occurrence: rdflib.URIRef, + row: frictionless.Row, + site_visit_id_temporal_map: dict[str, str] | None, + graph: rdflib.Graph, + ) -> None: + """Adds observation organism quantity to the graph. + + Args: + uri (rdflib.URIRef): URI to use for this node. + dataset (rdflib.URIRef): Dataset which data belongs. + provider_record_id_occurrence (rdflib.URIRef): Occurrence associated with this + node + row (frictionless.Row): Row to retrieve data from. + site_visit_id_temporal_map (dict[str, str] | None): Map of site visit + id to default temporal entity as rdf. + graph (rdflib.Graph): Graph to be modified. + """ + # Extract values + event_date: models.temporal.Timestamp | None = row["eventDateStart"] + organism_qty = row["organismQuantity"] + organism_qty_type = row["organismQuantityType"] + + # Check if organism quantity values were provided + if not organism_qty or not organism_qty_type: + return + + # Attach node to sample field and dataset + graph.add((uri, rdflib.SOSA.hasFeatureOfInterest, provider_record_id_occurrence)) + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + + # Add type + graph.add((uri, a, utils.namespaces.TERN.Observation)) + graph.add((uri, rdflib.RDFS.comment, rdflib.Literal("organismQuantity-observation"))) + graph.add((uri, rdflib.SOSA.observedProperty, CONCEPT_ORGANISM_QUANTITY)) + + # Declare temporal entity to allow correct assignment typechecks + temporal_entity: rdflib.term.Node | None = None + # Check eventDateStart provided + if event_date is not None: + temporal_entity = rdflib.BNode() + graph.add((uri, rdflib.TIME.hasTime, temporal_entity)) + graph.add((temporal_entity, a, rdflib.TIME.Instant)) + graph.add((temporal_entity, event_date.rdf_in_xsd, event_date.to_rdf_literal())) + # Add comment to temporal entity + graph.add( + ( + temporal_entity, + rdflib.RDFS.comment, + rdflib.Literal("Date unknown, template eventDateStart used as proxy"), + ) + ) + else: + # Use default rdf from site visit as temporal entity + temporal_entity = self.add_default_temporal_entity( + uri=uri, + site_visit_id_temporal_map=site_visit_id_temporal_map, + row=row, + graph=graph, + ) + # Add comment to temporal entity + if temporal_entity is not None: + comment = "Date unknown, site visit dates used as proxy." + graph.add((temporal_entity, rdflib.RDFS.comment, rdflib.Literal(comment))) + + # Add Human observation as proxy for observation method + human_observation = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/ea1d6342-1901-4f88-8482-3111286ec157") + graph.add((uri, rdflib.SOSA.usedProcedure, human_observation)) + + # Add organism quantity and type values + graph.add((uri, rdflib.SOSA.hasSimpleResult, rdflib.Literal(f"{organism_qty} {organism_qty_type}"))) + + # Add method comment to node + graph.add( + ( + uri, + rdflib.RDFS.comment, + rdflib.Literal("Observation method unknown, 'human observation' used as proxy"), + ) + ) + + def add_organism_quantity_value( + self, + uri: rdflib.URIRef, + organism_qty_observation: rdflib.URIRef, + dataset: rdflib.URIRef, + row: frictionless.Row, + graph: rdflib.Graph, + base_iri: rdflib.Namespace, + ) -> None: + """Adds organism quantity value to graph. + + Args: + uri (rdflib.URIRef): URI to use for this node. + organism_qty_observation (rdflib.URIRef): Observation URI. + dataset (rdflib.URIRef): Dataset this is a part of. + row (frictionless.Row): Row to retrieve data from. + graph (rdflib.Graph): Graph to be modified. + base_iri (rdflib.Namespace): Namespace used to construct IRIs + """ + # Extract values if any + organism_qty = row["organismQuantity"] + organism_qty_type = row["organismQuantityType"] + + # Check for values + if not (organism_qty and organism_qty_type): + return + + # Retrieve vocab for field + vocab = self.fields()["organismQuantityType"].get_flexible_vocab() + + # Get term or create on the fly + term = vocab(graph=graph, source=dataset, base_iri=base_iri).get(organism_qty_type) + + # Add to graph + graph.add((organism_qty_observation, rdflib.SOSA.hasResult, uri)) + graph.add((uri, a, utils.namespaces.TERN.Value)) + graph.add((uri, a, utils.namespaces.TERN.Float)) + graph.add((uri, rdflib.RDFS.label, rdflib.Literal("organism-quantity"))) + graph.add((uri, utils.namespaces.TERN.unit, term)) + graph.add((uri, rdflib.RDF.value, rdflib.Literal(organism_qty, datatype=rdflib.XSD.float))) + + def add_site( + self, + uri: rdflib.URIRef | None, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Adds site to the graph. + + Args: + uri (rdflib.URIRef | None): URI to use if site provided else None. + dataset (rdflib.URIRef): The dataset which the data belongs. + graph (rdflib.URIRef): Graph to be modified. + """ + # Check site uri exists + if uri is None: + return + + # Add site information to graph + graph.add((uri, a, utils.namespaces.TERN.Site)) + graph.add((uri, a, utils.namespaces.TERN.FeatureOfInterest)) + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + graph.add((uri, utils.namespaces.TERN.featureType, vocabs.site_type.SITE.iri)) + + def add_sensitivity_category_attribute( + self, + uri: rdflib.URIRef | None, + row: frictionless.Row, + dataset: rdflib.URIRef, + sensitivity_category_value: rdflib.URIRef | None, + graph: rdflib.Graph, + ) -> None: + """Adds Sensitivity Category Attribute to the Graph + + Args: + uri: URI to use for this node. + row: Row to retrieve data from + dataset: Dataset this belongs to + sensitivity_category_value: Sensitivity Category Value associated with this node + graph: Graph to add to + """ + # Check Existence + if uri is None: + return + + simple_value = f"{row['sensitivityCategory']} - {row['sensitivityAuthority']}" + + # Sensitivity Category Attribute + graph.add((uri, a, utils.namespaces.TERN.Attribute)) + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + graph.add((uri, utils.namespaces.TERN.attribute, CONCEPT_SENSITIVITY_CATEGORY)) + graph.add((uri, utils.namespaces.TERN.hasSimpleValue, rdflib.Literal(simple_value))) + if sensitivity_category_value: + graph.add((uri, utils.namespaces.TERN.hasValue, sensitivity_category_value)) + + def add_sensitivity_category_value( + self, + uri: rdflib.URIRef | None, + row: frictionless.Row, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + base_iri: rdflib.Namespace, + ) -> None: + """Adds Sensitivity Category Value to the Graph + + Args: + uri: URI to use for this node + row: Row to retrieve data from + dataset: Dataset this belongs to + graph: Graph to add to + base_iri: Namespace used to construct IRIs + """ + # Check Existence + if uri is None: + return + + # Retrieve vocab for field + vocab = self.fields()["sensitivityCategory"].get_flexible_vocab() + vocab_instance = vocab(graph=graph, source=dataset, base_iri=base_iri) + + # Set the scope note to use if a new term is created on the fly. + scope_note = f"Under the authority of {row['sensitivityAuthority']}" + if not isinstance(vocab_instance, utils.vocabs.FlexibleVocabulary): + raise RuntimeError("sensitiveCategory vocabulary is expected to be a FlexibleVocabulary") + vocab_instance.scope_note = rdflib.Literal(scope_note) + # This has to be done here, instead of at the Vocabulary definition, + # because the value is computed from another field (sensitivityAuthority). + + # Retrieve term or Create on the Fly + term = vocab_instance.get(row["sensitivityCategory"]) + + # Construct Label + label = f"sensitivity category = {row['sensitivityCategory']}" + + # Conservation Authority Value + graph.add((uri, a, utils.namespaces.TERN.IRI)) + graph.add((uri, a, utils.namespaces.TERN.Value)) + graph.add((uri, rdflib.RDFS.label, rdflib.Literal(label))) + graph.add((uri, rdflib.RDF.value, term)) + + def add_sensitivity_category_collection( + self, + *, + uri: rdflib.URIRef | None, + sensitivity_category: str | None, + sensitivity_category_attribute: rdflib.URIRef | None, + provider_record_id_biodiversity_record: rdflib.URIRef, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Add a sensitivity category Collection to the graph + + Args: + uri: The uri for the Collection. + sensitivity_category: sensitivityCategory value from template. + sensitivity_category_attribute: The uri for the attribute node. + provider_record_id_biodiversity_record: The biodiversity record. + dataset: The uri for the dateset node. + graph: The graph. + """ + if uri is None: + return + + # Add type + graph.add((uri, a, rdflib.SDO.Collection)) + # Add identifier + if sensitivity_category: + graph.add( + ( + uri, + rdflib.SDO.name, + rdflib.Literal(f"Occurrence Collection - Sensitivity Category - {sensitivity_category}"), + ) + ) + # Add link to dataset + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + # Add link to attribute + if sensitivity_category_attribute: + graph.add((uri, utils.namespaces.TERN.hasAttribute, sensitivity_category_attribute)) + # Add link to the biodiversity record + graph.add((uri, rdflib.SDO.member, provider_record_id_biodiversity_record)) + + def add_survey( + self, + uri: rdflib.URIRef, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Adds the basics of the Survey node to the graph. + + The other properties for the node come from the survey metadata. + + Args: + uri: The URI for the Survey node + dataset: The dataset URI + graph: The graph to update + """ + # Add type + graph.add((uri, a, utils.namespaces.TERN.Survey)) + # Add dataset link + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + + def add_biodiversity_record( + self, + uri: rdflib.URIRef, + provider_record_id_datatype: rdflib.URIRef, + provider_record_id_occurrence: rdflib.URIRef, + dataset: rdflib.URIRef, + row: frictionless.Row, + graph: rdflib.Graph, + ) -> None: + """Adds biodiversity record node to graph. + + Args: + uri: Subject of the node. + provider_record_id_datatype: The datatype associated with + the provider record id. + provider_record_id_occurrence: Reference to the occurrence + of the row. + dataset: The uri for the dateset node. + row: Raw data for row. + graph: Graph to be modified. + """ + # Add class + graph.add((uri, a, utils.namespaces.ABIS.BiodiversityRecord)) + # Add identifier value literal + graph.add( + (uri, rdflib.SDO.identifier, rdflib.Literal(row["providerRecordID"], datatype=provider_record_id_datatype)) + ) + # Add about property + graph.add((uri, rdflib.SDO.about, provider_record_id_occurrence)) + # Add link to dataset + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + + def add_occurrence( + self, + uri: rdflib.URIRef, + record_number_datatype: rdflib.URIRef | None, + owner_record_id_datatype: rdflib.URIRef | None, + other_catalog_numbers_datatype: rdflib.URIRef | None, + catalog_number_datatype: rdflib.URIRef | None, + provider_recorded_by: rdflib.URIRef | None, + survey: rdflib.URIRef, + site: rdflib.URIRef | None, + site_visit: rdflib.URIRef | None, + dataset: rdflib.URIRef, + site_id_geometry_map: dict[str, str] | None, + row: frictionless.Row, + graph: rdflib.Graph, + base_iri: rdflib.Namespace, + ) -> None: + """Adds occurrence node to the graph. + + Args: + uri: Subject of the node. + record_number_datatype: Datatype associated with the recordNumber. + owner_record_id_datatype: Datatype associated with the owner recordID. + other_catalog_numbers_datatype: Datatype associated with other catalog numbers. + catalog_number_datatype: Catalog number source datatype. + provider_recorded_by: Agent derived from the recordedBy field. + survey: Survey that the occurrence took place. + site: Designated site that occurrence happened. + site_visit: Visit associated with occurrence and site. + dataset: The uri for the dateset node. + site_id_geometry_map: Map for default geometry for a given siteID. + row: Raw data from the row. + graph: Graph to be modified. + base_iri: Namespace used to construct IRIs + """ + # Create geometry + # Extract values + latitude = row["decimalLatitude"] + longitude = row["decimalLongitude"] + geodetic_datum = row["geodeticDatum"] + site_id = row["siteID"] + + # Check to see if lat long provided + if latitude is not None and longitude is not None: + # Create geometry + geometry = models.spatial.Geometry( + raw=models.spatial.LatLong(latitude, longitude), + datum=geodetic_datum, + ) + + # If not then use default geometry map + elif site_id_geometry_map is not None and (default_geometry := site_id_geometry_map.get(site_id)) is not None: + # Create geometry from geosparql wkt literal + geometry = models.spatial.Geometry.from_geosparql_wkt_literal(default_geometry) + + else: + # Should not reach here since validated data provided, however if + # it does come to it the corresponding node will be omitted + return + + # Class + graph.add((uri, a, utils.namespaces.DWC.Occurrence)) + graph.add((uri, a, utils.namespaces.TERN.FeatureOfInterest)) + + # Add to dataset + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + + # Add identifiers + if record_number := row["recordNumber"]: + graph.add((uri, rdflib.SDO.identifier, rdflib.Literal(record_number, datatype=record_number_datatype))) + if owner_record_id := row["ownerRecordID"]: + graph.add((uri, rdflib.SDO.identifier, rdflib.Literal(owner_record_id, datatype=owner_record_id_datatype))) + for catalog_number in row["otherCatalogNumbers"] or []: + graph.add( + (uri, rdflib.SDO.identifier, rdflib.Literal(catalog_number, datatype=other_catalog_numbers_datatype)) + ) + + # Add feature type from vocab + kingdom_vocab = self.fields()["kingdom"].get_flexible_vocab("KINGDOM_OCCURRENCE") + graph.add( + ( + uri, + utils.namespaces.TERN.featureType, + kingdom_vocab(graph=graph, base_iri=base_iri).get(row["kingdom"]), + ) + ) + + # Add geometry + geometry_node = rdflib.BNode() + graph.add((uri, rdflib.SDO.spatial, geometry_node)) + graph.add((geometry_node, a, utils.namespaces.GEO.Geometry)) + graph.add((geometry_node, utils.namespaces.GEO.asWKT, geometry.to_transformed_crs_rdf_literal())) + + # Check for coordinateUncertaintyInMeters + accuracy: rdflib.Literal | None = None + if coordinate_uncertainty := row["coordinateUncertaintyInMeters"]: + # Add Spatial Accuracy + accuracy = rdflib.Literal(coordinate_uncertainty, datatype=rdflib.XSD.double) + graph.add((geometry_node, utils.namespaces.GEO.hasMetricSpatialAccuracy, accuracy)) + + # Add 'supplied as' geometry + self.add_geometry_supplied_as( + subj=uri, + pred=rdflib.SDO.spatial, + obj=geometry_node, + geom=geometry, + graph=graph, + spatial_accuracy=accuracy, + ) + + # Add temporal entity + event_date_start: models.temporal.Timestamp | None = row["eventDateStart"] + event_date_end: models.temporal.Timestamp | None = row["eventDateEnd"] + # Check any event dates provided + if event_date_start is not None or event_date_end is not None: + temporal_entity = rdflib.BNode() + graph.add((temporal_entity, a, rdflib.TIME.TemporalEntity)) + graph.add((uri, rdflib.SDO.temporal, temporal_entity)) + if event_date_start is not None: + start_instant = rdflib.BNode() + graph.add((start_instant, a, rdflib.TIME.Instant)) + graph.add((start_instant, event_date_start.rdf_in_xsd, event_date_start.to_rdf_literal())) + graph.add((temporal_entity, rdflib.TIME.hasBeginning, start_instant)) + if event_date_end is not None: + end_instant = rdflib.BNode() + graph.add((end_instant, a, rdflib.TIME.Instant)) + graph.add((end_instant, event_date_end.rdf_in_xsd, event_date_end.to_rdf_literal())) + graph.add((temporal_entity, rdflib.TIME.hasEnd, end_instant)) + + # Add procedure from vocab + protocol_vocab = self.fields()["samplingProtocol"].get_flexible_vocab() + graph.add( + ( + uri, + rdflib.SOSA.usedProcedure, + protocol_vocab(graph=graph, base_iri=base_iri).get(row["samplingProtocol"]), + ) + ) + + # Add location description if provided + if locality := row["locality"]: + graph.add((uri, utils.namespaces.TERN.locationDescription, rdflib.Literal(locality))) + + # Add associated with agents if provided + if provider_recorded_by is not None: + graph.add((uri, rdflib.PROV.wasAssociatedWith, provider_recorded_by)) + + # Check for catalogNumber + if row["catalogNumber"]: + # Add to Graph + graph.add( + ( + uri, + utils.namespaces.DWC.catalogNumber, + rdflib.Literal(row["catalogNumber"], datatype=catalog_number_datatype), + ) + ) + + # Check for collectionCode + if row["collectionCode"]: + # Add to Graph + graph.add((uri, utils.namespaces.DWC.collectionCode, rdflib.Literal(row["collectionCode"]))) + + # Add survey + graph.add((uri, rdflib.SDO.isPartOf, survey)) + + # Add site if provided + if site is not None: + graph.add((uri, rdflib.SOSA.isSampleOf, site)) + + # Add site visit if provided + if site_visit is not None: + graph.add((uri, utils.namespaces.TERN.hasSiteVisit, site_visit)) + + def add_site_visit( + self, + uri: rdflib.URIRef | None, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Adds the basics of the SiteVisit node to the graph. + + Only applicable when the occurrence has a siteVisitID. + The other properties for the node come from the site visit template. + + Args: + uri: The URI for the Site visit node + dataset: The dataset URI + graph: The graph to update + """ + # Check site visit exists + if uri is None: + return + + # Add type + graph.add((uri, a, utils.namespaces.TERN.SiteVisit)) + # Add dataset link + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + + +# Helper Functions +# These utility helper functions are specific to this template, and as such are +# defined here instead of in a common utilities module. +def has_specimen(row: frictionless.Row) -> bool: + """Determines whether a row has a specimen associated with it or not. + + This method is used when determining whether to add the specimen specific + `/sampling/specimen/x` and `/sample/specimen/x` nodes to the graph. + + Args: + row (frictionless.Row): Row to retrieve data from. + + Returns: + bool: Whether this row has a specimen associated with it. + """ + # Check Specimen Rules + if row["preparations"] or row["associatedSequences"]: + # If either of `preparations` or `associatedSequences` + # are provided, regardless of the value of `basisOfRecord` we can infer + # that there is a specimen associated with the row. + specimen = True + + elif ( + not row["basisOfRecord"] # Blank + or vocabs.basis_of_record.HUMAN_OBSERVATION.match(row["basisOfRecord"]) # HumanObservation + or vocabs.basis_of_record.OCCURRENCE.match(row["basisOfRecord"]) # Occurrence + ): + # Otherwise, if neither of `preparations` or + # `associatedSequences` were provided, and the `basisOfRecord` is + # either blank or one of "HumanObservation" or "Occurrence", then we + # cannot infer that there is a specimen associated with the row. + specimen = False + + else: + # Finally, neither of `preparations` or + # `associatedSequences` were provided, but the `basisOfRecord` is a + # value that implies that there is a specimen associated with the row. + specimen = True + + # Return + return specimen + + +# Register Mapper +base.mapper.register_mapper(SurveyOccurrenceMapper) diff --git a/abis_mapping/templates/survey_occurrence_data_v3/metadata.json b/abis_mapping/templates/survey_occurrence_data_v3/metadata.json new file mode 100644 index 00000000..2eb41780 --- /dev/null +++ b/abis_mapping/templates/survey_occurrence_data_v3/metadata.json @@ -0,0 +1,13 @@ +{ + "name": "survey_occurrence_data", + "label": "Systematic Survey Occurrence Data Template", + "version": "3.0.0", + "description": "A template to translate some Darwin Core fields", + "biodiversity_type": "Systematic Survey Species Occurrences", + "spatial_type": "Point", + "file_type": "CSV", + "sampling_type": "systematic survey", + "template_url": "https://raw.githubusercontent.com/gaiaresources/abis-mapping/main/abis_mapping/templates/survey_occurrence_data_v3/survey_occurrence_data.csv", + "schema_url": "https://raw.githubusercontent.com/gaiaresources/abis-mapping/main/abis_mapping/templates/survey_occurrence_data_v3/schema.json", + "template_lifecycle_status": "beta" +} diff --git a/abis_mapping/templates/survey_occurrence_data_v3/schema.json b/abis_mapping/templates/survey_occurrence_data_v3/schema.json new file mode 100644 index 00000000..97cc506a --- /dev/null +++ b/abis_mapping/templates/survey_occurrence_data_v3/schema.json @@ -0,0 +1,770 @@ +{ + "fields": [ + { + "name": "providerRecordID", + "title": "Provider Record ID", + "description": "Unique (within provider) identifier for the record.", + "example": "8022FSJMJ079c5cf", + "type": "string", + "format": "default", + "constraints": { + "required": true, + "unique": true + } + }, + { + "name": "providerRecordIDSource", + "title": "Provider Record ID Source", + "description": "Person or Organisation that generated the providerRecordID.", + "example": "Western Australian Biodiversity Information Office", + "type": "string", + "format": "default", + "constraints": { + "required": true + } + }, + { + "name": "locality", + "title": "Locality", + "description": "The specific description of the place.", + "example": "Cowaramup Bay Road", + "type": "string", + "format": "default", + "url": "https://dwc.tdwg.org/terms/#dwc:locality", + "constraints": { + "required": false + } + }, + { + "name": "decimalLatitude", + "title": "Decimal Latitude", + "description": "The geographic latitude (in decimal degrees, using the spatial reference system given in geodeticDatum) of the geographic centre of a Location. Positive values are north of the Equator, negative values are south of it. Valid coordinate ranges for the BDR system are within and inclusive of -90 to 0.", + "example": "-33.812314", + "type": "number", + "format": "default", + "url": "https://dwc.tdwg.org/terms/#dwc:decimalLatitude", + "constraints": { + "required": true, + "minimum": -90, + "maximum": 0 + } + }, + { + "name": "decimalLongitude", + "title": "Decimal Longitude", + "description": "The geographic longitude (in decimal degrees, using the spatial reference system given in geodeticDatum) of the geographic centre of a Location. Positive values are east of the Greenwich Meridian, negative values are west of it. Valid coordinate ranges for the BDR system are within and inclusive of 0 to 180.", + "example": "115.231512", + "type": "number", + "format": "default", + "url": "https://dwc.tdwg.org/terms/#dwc:decimalLongitude", + "constraints": { + "required": true, + "minimum": 0, + "maximum": 180 + } + }, + { + "name": "geodeticDatum", + "title": "Geodetic Datum", + "description": "The acronym for the ellipsoid, geodetic datum, or spatial reference system (SRS) upon which the geographic (non-projected) coordinates given in decimalLatitude and decimalLongitude as based.", + "example": "WGS84", + "type": "string", + "format": "default", + "url": "https://dwc.tdwg.org/terms/#dwc:geodeticDatum", + "constraints": { + "required": true, + "enum": [ + "AGD66", + "EPSG:4202", + "AGD84", + "EPSG:4203", + "GDA2020", + "EPSG:7844", + "GDA94", + "EPSG:4283", + "WGS84", + "EPSG:4326" + ] + }, + "vocabularies": [ + "GEODETIC_DATUM" + ] + }, + { + "name": "coordinateUncertaintyInMeters", + "title": "Coordinate Uncertainty In Meters", + "description": "The horizontal distance (in metres) from the given decimalLatitude and decimalLongitude describing the smallest circle containing the whole of the Location. Leave the value empty if the uncertainty is unknown, cannot be estimated, or is not applicable (because there are no coordinates). Zero is not a valid value for this term.", + "example": "50.0", + "type": "number", + "format": "default", + "url": "https://dwc.tdwg.org/terms/#dwc:coordinateUncertaintyInMeters", + "constraints": { + "required": false, + "minimum": 0 + } + }, + { + "name": "dataGeneralizations", + "title": "Data Generalizations", + "description": "Actions taken to make the shared data less specific or complete than in its original form, due to restrictions around identifying locations of particular species. Suggests that alternative data of higher quality may be available on request.", + "example": "Coordinates rounded to the nearest 10 km for conservation concern", + "type": "string", + "format": "default", + "url": "https://dwc.tdwg.org/terms/#dwciri:dataGeneralizations", + "constraints": { + "required": false + } + }, + { + "name": "eventDateStart", + "title": "Event Date Start", + "description": "The start date (with precision of year (YYYY), month year (YYYY-MM) or date in the following formats DD/MM/YYYY or YYYY-MM-DD are accepted) or date-time without timezone (in ISO 8601 format for example 2021-07-11T06:23:00) or date-time with timezone (in ISO 8601 format for example 2022-05-20T06:23:00+08:00) of the period during which a species occurrence was observed. For occurrences, this is the date-time when the event was recorded. Not suitable for a time in a geological context.", + "example": "2019-09-23T14:03+08:00", + "type": "timestamp", + "format": "default", + "constraints": { + "required": true + } + }, + { + "name": "eventDateEnd", + "title": "Event Date End", + "description": "The end date (with precision of year (YYYY), month year (YYYY-MM) or date in the following formats DD/MM/YYYY or YYYY-MM-DD are accepted) or date-time without timezone (in ISO 8601 format for example 2021-07-11T06:23:00) or date-time with timezone (in ISO 8601 format for example 2022-05-20T06:23:00+08:00) of the period during which a species occurrence was observed. For occurrences, this is the date-time when the event was recorded. Not suitable for a time in a geological context.", + "example": "2019-09-24", + "type": "timestamp", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "samplingProtocol", + "title": "Sampling Protocol", + "description": "The sampling protocol is the method used to sample the locality to determine the presence (or absence) of the taxon referred to in this record at the indicated time. This may be a collecting method or a method to observe an organism without collection.\nRecommended best practice is to describe a species occurrence with no more than one sampling protocol. In the case of a summary, in which a specific protocol can not be attributed to specific species occurrences, the recommended best practice is to repeat the property for each IRI that denotes a different sampling protocol that applies to the occurrence.", + "example": "Human Observation", + "type": "string", + "format": "default", + "url": "https://dwc.tdwg.org/terms/#dwciri:samplingProtocol", + "constraints": { + "required": false + }, + "vocabularies": [ + "SAMPLING_PROTOCOL" + ] + }, + { + "name": "basisOfRecord", + "title": "Basis Of Record", + "description": "The specific nature of the data record.", + "example": "Preserved Specimen", + "type": "string", + "format": "default", + "url": "https://dwc.tdwg.org/terms/#dwc:basisOfRecord", + "constraints": { + "required": false + }, + "vocabularies": [ + "BASIS_OF_RECORD" + ] + }, + { + "name": "recordedBy", + "title": "Recorded By", + "description": "A person, group, or organisation responsible for recording the original Occurrence.", + "example": "Stream Environment and Water Pty Ltd", + "type": "string", + "format": "default", + "url": "https://dwc.tdwg.org/terms/#dwciri:recordedBy", + "constraints": { + "required": false + } + }, + { + "name": "recordNumber", + "title": "Record Number", + "description": "An identifier given to the Occurrence at the time it was recorded. Often serves as a link between field notes and an Occurrence record, such as a specimen collector's number.", + "example": "PE:12:8832", + "type": "string", + "format": "default", + "url": "http://rs.tdwg.org/dwc/terms/recordNumber", + "constraints": { + "required": false + } + }, + { + "name": "occurrenceStatus", + "title": "Occurrence Status", + "description": "A statement about the presence or absence of a Taxon at a Location.", + "example": "Present", + "type": "string", + "format": "default", + "url": "https://dwc.tdwg.org/terms/#dwc:occurrenceStatus", + "constraints": { + "required": false + }, + "vocabularies": [ + "OCCURRENCE_STATUS" + ] + }, + { + "name": "habitat", + "title": "Habitat", + "description": "A category or description of the habitat in which the event occurred.", + "example": "Closed forest of Melaleuca lanceolata. White, grey or brown sand, sandy loam.", + "type": "string", + "format": "default", + "url": "http://rs.tdwg.org/dwc/terms/habitat", + "constraints": { + "required": false + }, + "vocabularies": [ + "TARGET_HABITAT_SCOPE" + ] + }, + { + "name": "establishmentMeans", + "title": "Establishment Means", + "description": "Statement about whether an organism or organisms have been introduced to a given place and time through the direct or indirect activity of modern humans.", + "example": "Native", + "type": "string", + "format": "default", + "url": "https://dwc.tdwg.org/terms/#dwc:establishmentMeans", + "constraints": { + "required": false + }, + "vocabularies": [ + "ESTABLISHMENT_MEANS" + ] + }, + { + "name": "organismRemarks", + "title": "Organism Remarks", + "description": "Comments or notes about the Organism instance.", + "example": "Dried out leaf tips.", + "type": "string", + "format": "default", + "url": "http://rs.tdwg.org/dwc/terms/organismRemarks", + "constraints": { + "required": false + } + }, + { + "name": "individualCount", + "title": "Individual Count", + "description": "The number of individuals present at the time of the Occurrence. 0 = none, no value = the specific number was not recorded.", + "example": "26", + "type": "integer", + "format": "default", + "url": "https://dwc.tdwg.org/terms/#dwc:individualCount", + "constraints": { + "required": false + } + }, + { + "name": "organismQuantity", + "title": "Organism Quantity", + "description": "A number or enumeration value for the quantity of organisms.", + "example": "12.5", + "type": "number", + "format": "default", + "url": "https://dwc.tdwg.org/list/#dwc_organismQuantity", + "constraints": { + "required": false + } + }, + { + "name": "organismQuantityType", + "title": "Organism Quantity Type", + "description": "The type of quantification system used for the quantity organisms.", + "example": "% biomass", + "type": "string", + "format": "default", + "url": "https://dwc.tdwg.org/list/#dwc_organismQuantityType", + "constraints": { + "required": false + }, + "vocabularies": [ + "ORGANISM_QUANTITY_TYPE" + ] + }, + { + "name": "lifeStage", + "title": "Life Stage", + "description": "The age class or life stage of the Organism(s) at the time the Occurrence was recorded.", + "example": "adult", + "type": "string", + "format": "default", + "url": "http://rs.tdwg.org/dwc/terms/lifeStage", + "constraints": { + "required": false + }, + "vocabularies": [ + "LIFE_STAGE" + ] + }, + { + "name": "sex", + "title": "Sex", + "description": "The sex of the biological individual(s) represented in the Occurrence.", + "example": "Unspecified", + "type": "string", + "format": "default", + "url": "https://dwc.tdwg.org/terms/#dwciri:sex", + "constraints": { + "required": false + }, + "vocabularies": [ + "SEX" + ] + }, + { + "name": "reproductiveCondition", + "title": "Reproductive Condition", + "description": "The reproductive condition of the biological individual(s) represented in the Occurrence.", + "example": "No breeding evident", + "type": "string", + "format": "default", + "url": "https://dwc.tdwg.org/terms/#dwc:reproductiveCondition", + "constraints": { + "required": false + }, + "vocabularies": [ + "REPRODUCTIVE_CONDITION" + ] + }, + { + "name": "ownerRecordID", + "title": "Owner Record ID", + "description": "Identifier given to the occurrence by the owner of the data. Populate this field if the data owner is different to the data provider. Unique (within data owner) identifier for the record.", + "example": "12345NT521mc5h", + "type": "string", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "ownerRecordIDSource", + "title": "Owner Record ID Source", + "description": "Person or Organisation that generated the ownerRecordID. For organisations registered with the BDR, this field should contain the BDR registrationID. For all others, please provide the name of Person or Organisation who owns the data.", + "example": "WAM", + "type": "string", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "collectionCode", + "title": "Collection Code", + "description": "The name, acronym, code, or initialism identifying the collection or data set from which the record was derived. It is associated with the catalogNumber.", + "example": "ARACH", + "type": "string", + "format": "default", + "url": "https://dwc.tdwg.org/terms/#dwc:collectionCode", + "constraints": { + "required": false + } + }, + { + "name": "catalogNumber", + "title": "Catalog Number", + "description": "An identifier (preferably unique) for the record within the data set or collection.", + "example": "145732, 145732a, 2008.1334, R-4313", + "type": "string", + "format": "default", + "url": "http://rs.tdwg.org/dwc/terms/catalogNumber", + "constraints": { + "required": false + } + }, + { + "name": "catalogNumberSource", + "title": "Catalog Number Source", + "description": "Organisation that generated the catalogNumber. In the BDR context, this is likely to be a collecting institution where a specimen or material sample is located. For organisations registered with the BDR, this field should contain the BDR registrationID. For all others, please provide the name of Person or Organisation.", + "example": "Western Australian Museum", + "type": "string", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "otherCatalogNumbers", + "title": "Other Catalog Numbers", + "description": "A list (concatenated and separated with a space vertical bar space ( | )) of previous or alternate fully qualified catalog numbers or other human-used identifiers for the same Occurrence, whether in the current or any other data set or collection.", + "example": "BHP2012-7521 | M12378", + "type": "list", + "format": "default", + "url": "http://rs.tdwg.org/dwc/terms/otherCatalogNumbers", + "constraints": { + "required": false + } + }, + { + "name": "otherCatalogNumbersSource", + "title": "Other Catalog Numbers Source", + "description": "Organisation that generated the otherCatalogNumbers. For organisations registered with the BDR, this field should contain the BDR registrationID. For all others, please provide the name of Person or Organisation.", + "example": "University of Western Australia", + "type": "string", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "preparations", + "title": "Preparations", + "description": "A list (concatenated and separated with a space vertical bar space ( | )) of preparations and preservation methods for a specimen.", + "example": "alcohol", + "type": "string", + "format": "default", + "url": "http://rs.tdwg.org/dwc/terms/preparations", + "constraints": { + "required": false + }, + "vocabularies": [ + "PREPARATIONS" + ] + }, + { + "name": "preparedDate", + "title": "Prepared Date", + "description": "The date (with precision of year (YYYY), month year (YYYY-MM) or date in the following formats DD/MM/YYYY or YYYY-MM-DD are accepted) or date-time without timezone (in ISO 8601 format for example 2021-07-11T11:23:00) or date-time with timezone(in ISO 8601 format for example 2022-05-20T06:23:00+08:00) representing the date or date-time the specimen was prepared.", + "example": "2019-09-24", + "type": "timestamp", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "associatedSequences", + "title": "Associated Sequences", + "description": "A list (concatenated and separated with a space vertical bar space ( | )) of identifiers (publication, global unique identifier, URI) of genetic sequence information associated with the Occurrence.", + "example": "https://www.ncbi.nlm.nih.gov/nuccore/MH040669.1 | https://www.ncbi.nlm.nih.gov/nuccore/MH040616.1", + "type": "list", + "format": "default", + "url": "http://rs.tdwg.org/dwc/terms/associatedSequences", + "constraints": { + "required": false + } + }, + { + "name": "sequencingMethod", + "title": "Sequencing Method", + "description": "The method used to obtain sequence data for example DNA, RNA, or protein from the sample.", + "example": "Sanger-dideoxy-sequencing", + "type": "string", + "format": "default", + "constraints": { + "required": false + }, + "vocabularies": [ + "SEQUENCING_METHOD" + ] + }, + { + "name": "verbatimIdentification", + "title": "Verbatim Identification", + "description": "A string representing the taxonomic identification as it appeared in the original record. This term is meant to allow the capture of an unaltered original identification/determination, including identification qualifiers, hybrid formulas, uncertainties, etc. This term is meant to be used in addition to scientificName (and identificationQualifier etc.), not instead of it.", + "example": "Caladenia ?excelsa", + "type": "string", + "format": "default", + "url": "https://dwc.tdwg.org/terms/#dwc:verbatimIdentification", + "constraints": { + "required": false + } + }, + { + "name": "dateIdentified", + "title": "Date Identified", + "description": "The date (with precision of year (YYYY), month year (YYYY-MM) or date in the following formats DD/MM/YYYY or YYYY-MM-DD are accepted) or date-time without timezone (in ISO 8601 format for example 2021-07-11T11:23:00) or date-time with timezone(in ISO 8601 format for example 2022-05-20T06:23:00+08:00) on which the subject was determined as representing the Taxon.", + "example": "2019-09-24", + "type": "timestamp", + "format": "default", + "url": "http://rs.tdwg.org/dwc/terms/dateIdentified", + "constraints": { + "required": false + } + }, + { + "name": "identifiedBy", + "title": "Identified By", + "description": "Group of names, organisations who assigned the Taxon to the subject. For multiple names, use the pipe separator ( | ).", + "example": "J. Doe | WAM", + "type": "string", + "format": "default", + "url": "http://rs.tdwg.org/dwc/terms/identifiedBy", + "constraints": { + "required": false + } + }, + { + "name": "identificationMethod", + "title": "Identification Method", + "description": "Method used to associate the organism with the scientificName label.", + "example": "DNA", + "type": "string", + "format": "default", + "constraints": { + "required": false + }, + "vocabularies": [ + "IDENTIFICATION_METHOD" + ] + }, + { + "name": "scientificName", + "title": "Scientific Name", + "description": "The full scientific name, with authorship and date information if known. When forming part of an Identification, this should be the name in lowest level taxonomic rank that can be determined. This term should not contain identification qualifications, which should instead be supplied in the identificationQualifier column.\nNOTE: Phrase names such as Rhagodia sp. Hamersley (M.Trudgen 17794) are permitted in the scientificName field where those are in use.", + "example": "Caladenia excelsa", + "type": "string", + "format": "default", + "url": "http://rs.tdwg.org/dwc/terms/scientificName", + "constraints": { + "required": true + } + }, + { + "name": "identificationQualifier", + "title": "Identification Qualifier", + "description": "A brief phrase or a standard term (\"cf.\", \"aff.\") to express the determiner's doubts about the Identification.", + "example": "Species incerta", + "type": "string", + "format": "default", + "url": "https://dwc.tdwg.org/terms/#dwc:identificationQualifier", + "constraints": { + "required": false + }, + "vocabularies": [ + "IDENTIFICATION_QUALIFIER" + ] + }, + { + "name": "identificationRemarks", + "title": "Identification Remarks", + "description": "Comments or notes about the Identification.", + "example": "DNA evidence may indicate a new species. Further analysis required.", + "type": "string", + "format": "default", + "url": "http://rs.tdwg.org/dwc/terms/identificationRemarks", + "constraints": { + "required": false + } + }, + { + "name": "acceptedNameUsage", + "title": "Accepted Name Usage", + "description": "The full name, with authorship and date information if known, of the currently valid (zoological) or accepted (botanical) taxon.", + "example": "Occiperipatoides gilesii (Spencer, 1909)", + "type": "string", + "format": "default", + "url": "https://dwc.tdwg.org/terms/#dwc:acceptedNameUsage", + "constraints": { + "required": false + } + }, + { + "name": "kingdom", + "title": "Kingdom", + "description": "The full scientific name of the kingdom in which the taxon is classified.", + "example": "Plantae", + "type": "string", + "format": "default", + "url": "https://dwc.tdwg.org/terms/#dwc:kingdom", + "constraints": { + "required": true + }, + "vocabularies": [ + "KINGDOM_OCCURRENCE", + "KINGDOM_SPECIMEN" + ] + }, + { + "name": "taxonRank", + "title": "Taxon Rank", + "description": "The taxonomic rank of the most specific name in the scientificName.", + "example": "Species", + "type": "string", + "format": "default", + "url": "http://rs.tdwg.org/dwc/terms/verbatimTaxonRank", + "constraints": { + "required": false + }, + "vocabularies": [ + "TAXON_RANK" + ] + }, + { + "name": "threatStatus", + "title": "Threat Status", + "description": "The conservation status (or code) assigned to an organism that is recognised in conjunction with a specific authority.", + "example": "EN", + "type": "string", + "format": "default", + "constraints": { + "required": false + }, + "vocabularies": [ + "THREAT_STATUS" + ] + }, + { + "name": "conservationAuthority", + "title": "Conservation Authority", + "description": "The authority under which an organism is recognised to have a specific conservation status applied.", + "example": "EPBC, WA", + "type": "string", + "format": "default", + "constraints": { + "required": false, + "enum": [ + "ACT", + "AUSTRALIAN CAPITAL TERRITORY", + "Australian Capital Territory", + "EPBC", + "ENVIRONMENT PROTECTION AND BIODIVERSITY CONSERVATION", + "Environment Protection And Biodiversity Conservation", + "NSW", + "NEW SOUTH WALES", + "New South Wales", + "NT", + "NORTHERN TERRITORY", + "Northern Territory", + "QLD", + "QUEENSLAND", + "Queensland", + "SA", + "SOUTH AUSTRALIA", + "South Australia", + "TAS", + "TASMANIA", + "Tasmania", + "VIC", + "VICTORIA", + "Victoria", + "WA", + "WESTERN AUSTRALIA", + "Western Australia" + ] + }, + "vocabularies": [ + "CONSERVATION_AUTHORITY" + ] + }, + { + "name": "threatStatusCheckProtocol", + "title": "Threat Status Check Protocol", + "description": "The method used to determine if the organism is listed under the relevant jurisdictional threatened species list.", + "example": "Species name check of the Department of Climate Change, Energy, the Environment and Water’s Species Profile and Threat Database http://www.environment.gov.au/cgi-bin/sprat/public/sprat.pl", + "type": "string", + "format": "default", + "constraints": { + "required": false + }, + "vocabularies": [ + "CHECK_PROTOCOL" + ] + }, + { + "name": "threatStatusDateDetermined", + "title": "Threat Status Date Determined", + "description": "The date (with precision of year (YYYY), month year (YYYY-MM) or date in the following formats DD/MM/YYYY or YYYY-MM-DD are accepted) or date-time without timezone (in ISO 8601 format for example 2021-07-11T11:23:00) or date-time with timezone(in ISO 8601 format for example 2022-05-20T06:23:00+08:00) on which this record of this organism was assigned to the nominated threatStatus and conservationAuthority", + "example": "30/08/2022", + "type": "timestamp", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "threatStatusDeterminedBy", + "title": "Threat Status Determined By", + "description": "The person and/organisation responsible for appending the threatStatus and conservationAuthority to this organism’s occurrence record.", + "example": "WA-BIO", + "type": "string", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "sensitivityCategory", + "title": "Sensitivity Category", + "description": "A category that defines a type of data sensitivity or restriction.", + "example": "Category 2", + "type": "string", + "format": "default", + "constraints": { + "required": false + }, + "vocabularies": [ + "SENSITIVITY_CATEGORY" + ] + }, + { + "name": "sensitivityAuthority", + "title": "Sensitivity Authority", + "description": "The authority under which a data sensitivity or restriction has been applied.", + "example": "Department of Environment, Climate Change and Water NSW", + "type": "string", + "format": "default", + "constraints": { + "required": false + }, + "vocabularies": [ + "SENSITIVITY_AUTHORITY" + ] + }, + { + "name": "surveyID", + "title": "Survey ID", + "description": "The identifier of the Survey that the occurrence comes from. This field should be completed if it is ambiguous as to which survey the occurrence belongs to.", + "example": "AR220-01", + "type": "string", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "siteID", + "title": "Site ID", + "description": "Corresponds to a unique site identifier, provided within accompanying survey_site_data.csv template.", + "example": "P1", + "type": "string", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "siteVisitID", + "title": "Site Visit ID", + "description": "The identifier of the site visit that the occurrence comes from. This field should be completed if actions taken during a site visit resulted in this occurrence record.", + "example": "CPXEI0000001", + "type": "string", + "format": "default", + "constraints": { + "required": false + } + } + ], + "foreignKeys": [ + { + "fields": "siteID", + "reference": { + "resource": "survey_site_data", + "fields": "siteID" + } + }, + { + "fields": "siteVisitID", + "reference": { + "resource": "survey_site_visit_data", + "fields": "siteVisitID" + } + } + ] +} diff --git a/abis_mapping/templates/survey_occurrence_data_v3/survey_occurrence_data.csv b/abis_mapping/templates/survey_occurrence_data_v3/survey_occurrence_data.csv new file mode 100644 index 00000000..815a40c6 --- /dev/null +++ b/abis_mapping/templates/survey_occurrence_data_v3/survey_occurrence_data.csv @@ -0,0 +1 @@ +providerRecordID,providerRecordIDSource,locality,decimalLatitude,decimalLongitude,geodeticDatum,coordinateUncertaintyInMeters,dataGeneralizations,eventDateStart,eventDateEnd,samplingProtocol,basisOfRecord,recordedBy,recordNumber,occurrenceStatus,habitat,establishmentMeans,organismRemarks,individualCount,organismQuantity,organismQuantityType,lifeStage,sex,reproductiveCondition,ownerRecordID,ownerRecordIDSource,collectionCode,catalogNumber,catalogNumberSource,otherCatalogNumbers,otherCatalogNumbersSource,preparations,preparedDate,associatedSequences,sequencingMethod,verbatimIdentification,dateIdentified,identifiedBy,identificationMethod,scientificName,identificationQualifier,identificationRemarks,acceptedNameUsage,kingdom,taxonRank,threatStatus,conservationAuthority,threatStatusCheckProtocol,threatStatusDateDetermined,threatStatusDeterminedBy,sensitivityCategory,sensitivityAuthority,surveyID,siteID,siteVisitID diff --git a/abis_mapping/templates/survey_occurrence_data_v3/templates/instructions.md b/abis_mapping/templates/survey_occurrence_data_v3/templates/instructions.md new file mode 100644 index 00000000..985c96f7 --- /dev/null +++ b/abis_mapping/templates/survey_occurrence_data_v3/templates/instructions.md @@ -0,0 +1,181 @@ +{% extends "BASE_TEMPLATE base.md" %} +{% block body %} +# SYSTEMATIC SURVEY OCCURRENCES DATA TEMPLATE INSTRUCTIONS + +## Intended Usage +This Systematic Survey Occurrences Data template is used to record occurrence data; +that is, the presence or absence of an organism +at a particular site locality at a point in time. + +This Systematic Survey Occurrences Data template **must be used in combination** with the +Systematic Survey Metadata template, or with the Systematic Survey Metadata and the Systematic Survey Sites Data +template, or with the Systematic Survey Metadata, the Systematic Survey Site Data and the Systematic Survey Site Visit +Data template. + +Templates have been provided to facilitate integration of your data into the Biodiversity +Data Repository database. Not all types of data have been catered for in the available +templates at this stage; therefore, if you are unable to find a suitable template, please +contact to make us aware of your data needs. + +## Data Validation Requirements: +For data validation, you will need your data file to: + +- be in the correct **file format,** +- have **fields that match the template downloaded** (do not remove, or + change the order of fields), +- have extant values for **mandatory fields** (see Table 1), +- comply with all **data value constraints**; for example the geographic coordinates are + consistent with a [geodeticDatum](#geodeticDatum-vocabularies) type of the ***{{values.geodetic_datum_count}}*** available + options, and +- align with existing controlled [vocabularies](#appendix-i-vocabulary-list) wherever possible (this is mandatory + for geodeticDatum), but new terms may be submitted for consideration amd will not cause a + validation error. + +Additional fields may be added **after the templated fields** (noting that the data type +is not assumed and values will be encoded as strings). + +### FILE FORMAT +- The systematic survey occurrence data template is a [UTF-8](#appendix-iii-utf-8) encoded csv (that is, not Microsoft + Excel Spreadsheets). Be sure to save this file with your data as a .csv (UTF-8): +
`[MS Excel: Save As > More options > Tools > Web options > Save this document as > + Unicode (UTF-8)]`
+ otherwise it will not pass the csv validation step upon upload. +- **Do not include empty rows**. + +### FILE NAME + +When making a manual submission to the Biodiversity Data Repository, +the file name must include the version number +of this biodiversity data template (`v{{ metadata.version }}`). +The following format is an example of a valid file name: + +`data_descripion-v{{ metadata.version }}-additional_description.csv` + +where: + +* `data_description`: A short description of the data (e.g. `survey_occ`, `test_data`). +* `v{{ metadata.version }}`: The version number of this template. +* `additional_description`: (Optional) Additional description of the data, if needed (e.g. `test_data`). +* `.csv`: Ensure the file name ends with `.csv`. + +For example, `survey_occ-v{{ metadata.version }}-test_data.csv` or `test_data-v{{ metadata.version }}.csv` + +### FILE SIZE +MS Excel imposes a limit of 1,048,576 rows on a spreadsheet, limiting a CSV file to the +header row followed by 1,048,575 occurrences. Furthermore, MS Excel has a 32,767-character +limit on individual cells in a spreadsheet. These limits may be overcome by using or +editing CSV files with other software. + +Larger datasets may be more readily ingested using the API interface. Please contact + to make us aware of your data needs. + +## TEMPLATE FIELDS +The template contains the field names in the top row. Table 1 will assist you in transferring +your data to the template by providing guidance on: + +- **Field name** in the template (and an external link to the [Darwin Core standard](https://dwc.tdwg.org/terms/) + for that field where relevant); +- **Description** of the field; +- **Required** i.e. whether the field is **mandatory, +conditionally mandatory, or optional**; +- **Format** (datatype) required for the data values for example text (string), number + (integer, float), or date; +- **Example** of an entry or entries for that field; and +- **[Vocabulary links](#appendix-i-vocabulary-list)** within this document (for example pick list values) where + relevant. The fields that have suggested values options for the fields in Table 1 are + listed in Table 2 in alphabetical order of the field name. + +### ADDITIONAL FIELDS +Data that does not match the existing template fields may be added as +additional columns in the CSV files after the templated fields. +For example: `eventRemarks`, `associatedTaxa`, `pathway`. + +Table 1: Systematic Survey Occurrence data template fields with descriptions, conditions, +datatype format, and examples. + +{{tables.fields}} + +## CHANGELOG + +No changes from Systematic Survey Occurrence Data Template v2.0.0 + +## APPENDICES +### APPENDIX-I: Vocabulary List +With the exception of `geodeticDatum`, data validation does not require fields to adhere to the +vocabularies specified for the various vocabularied fields. These vocabularies are merely provided as a +means of assistance in developing a consistent language within the database. New terms may be added +to more appropriately describe your data that goes beyond the current list. Table 2 provides some +suggested values from existing sources such as: [Biodiversity Information Standard (TDWG)](https://dwc.tdwg.org/), +[EPSG.io Coordinate systems worldwide](https://epsg.io/), the [Global Biodiversity Information +System](https://rs.gbif.org/), and [Open Nomenclature in the biodiversity +era](https://doi.org/10.1111/2041-210X.12594). + +Table 2: Suggested values for the controlled vocabulary fields in the template. Each term has +a preferred label with a definition to aid understanding of its meaning. For some terms, alternative +labels with similar semantics are provided. Note: `geodeticDatum` value +**must** come from one of five options in this table. + + + +{{tables.vocabularies}} + + +Table 2b: Suggested values for conditionally mandatory values for the `threatStatus` and +`conservationAuthority` fields in the template. State and Territory `conservationAuthority` +spelt out as words are also valid. For some `threatStatus` terms, alternative labels are provided +that are also valid for that `conservationAuthority`. + +{{tables.threat_status}} + +### APPENDIX-II: Timestamp +Following date and date-time formats are acceptable within the timestamp: + +| TYPE | FORMAT | +| --- |-------------------------------------------------------------------------------------------------------------------------------------| +| **xsd:dateTimeStamp with timezone** | yyyy-mm-ddThh:mm:ss.sTZD (eg 1997-07-16T19:20:30.45+01:00) OR
yyyy-mm-ddThh:mm:ssTZD (eg 1997-07-16T19:20:30+01:00) OR
yyyy-mm-ddThh:mmTZD (eg 1997-07-16T19:20+01:00)| +| **xsd:dateTime** | yyyy-mm-ddThh:mm:ss.s (eg 1997-07-16T19:20:30.45) OR
yyyy-mm-ddThh:mm:ss (eg 1997-07-16T19:20:30) OR
yyyy-mm-ddThh:mm (eg 1997-07-16T19:20) | +| **xsd:Date** | dd/mm/yyyy OR
d/m/yyyy OR
yyyy-mm-dd OR
yyyy-m-d | +| **xsd:gYearMonth** | mm/yyyy OR
m/yyyy OR
yyyy-mm | +| **xsd:gYear** | yyyy | + +Where
+  `yyyy`: four-digit year
+  `mm`: two-digit month (01=January, etc.)
+  `dd`: two-digit day of month (01 through 31)
+  `hh`: two digits of hour (00 through 23) (am/pm NOT allowed)
+  `mm`: two digits of minute (00 through 59)
+  `ss`: two digits of second (00 through 59)
+  `s`: one or more digits representing a decimal fraction of a second +  `TZD`: time zone designator (Z or +hh:mm or -hh:mm) + + +### APPENDIX-III: UTF-8 +UTF-8 encoding is considered a best practice for handling character encoding, especially in +the context of web development, data exchange, and modern software systems. UTF-8 +(Unicode Transformation Format, 8-bit) is a variable-width character encoding capable of +encoding all possible characters (code points) in Unicode.
+Here are some reasons why UTF-8 is recommended: +- **Universal Character Support:** UTF-8 can represent almost all characters from all writing + systems in use today. This includes characters from various languages, mathematical symbols, + and other special characters. +- **Backward Compatibility:** UTF-8 is backward compatible with ASCII (American + Standard Code for Information Interchange). The first 128 characters in UTF-8 are + identical to ASCII, making it easy to work with systems that use ASCII. +- **Efficiency:** UTF-8 is space-efficient for Latin-script characters (common in English + and many other languages). It uses one byte for ASCII characters and up to four + bytes for other characters. This variable-length encoding minimises storage and + bandwidth requirements. +- **Web Standards:** UTF-8 is the dominant character encoding for web content. It is + widely supported by browsers, servers, and web-related technologies. +- **Globalisation:** As software applications become more globalised, supporting a wide + range of languages and scripts becomes crucial. UTF-8 is well-suited for + internationalisation and multilingual support. +- **Compatibility with Modern Systems:** UTF-8 is the default encoding for many + programming languages, databases, and operating systems. Choosing UTF-8 helps + ensure compatibility across different platforms and technologies. + +When working with text data, UTF-8 encoding is recommended to avoid issues related to character +representation and ensure that a diverse set of characters and languages is supported. + +For assistance, please contact: +{% endblock %} diff --git a/abis_mapping/templates/survey_occurrence_data_v3/validators/validator.ttl b/abis_mapping/templates/survey_occurrence_data_v3/validators/validator.ttl new file mode 100644 index 00000000..58e12772 --- /dev/null +++ b/abis_mapping/templates/survey_occurrence_data_v3/validators/validator.ttl @@ -0,0 +1,275 @@ +@prefix abis: . +@prefix bdrsh: . +@prefix dwc: . +@prefix geo: . +@prefix prov: . +@prefix rdfs: . +@prefix schema: . +@prefix sh: . +@prefix skos: . +@prefix sosa: . +@prefix tern: . +@prefix time: . +@prefix xsd: . + +bdrsh:DatatypeShape + a rdfs:Class, sh:NodeShape ; + sh:targetClass rdfs:Datatype ; + sh:xone ( + bdrsh:RecordIDDatatypeShape + bdrsh:RecordNumberDatatypeShape + bdrsh:CatalogNumberDatatypeShape + bdrsh:DatasetDatatypeShape # Specified in the base mapper + ) . + +bdrsh:CatalogNumberDatatypeShape + a sh:NodeShape ; + sh:property [ + sh:path skos:prefLabel ; + sh:datatype xsd:string ; + sh:nodeKind sh:Literal ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:pattern " catalogNumber$" ; + ] ; + sh:property [ + sh:path skos:definition ; + sh:datatype xsd:string ; + sh:nodeKind sh:Literal ; + sh:minCount 1; + sh:maxCount 1; + sh:pattern "^A catalog number for the sample$" ; + ] . + +bdrsh:RecordNumberDatatypeShape + a sh:NodeShape ; + sh:property [ + sh:path skos:prefLabel ; + sh:datatype xsd:string ; + sh:nodeKind sh:Literal ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:pattern " recordNumber$" ; + ] ; + sh:property [ + sh:path skos:definition ; + sh:datatype xsd:string ; + sh:nodeKind sh:Literal ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:pattern "^The record number of the original observation from the original observer of the organism$" ; + ] ; + sh:property [ + sh:path prov:wasAttributedTo ; + sh:nodeKind sh:IRI ; + sh:minCount 1 ; + sh:maxCount 1 ; + ] . + +bdrsh:RecordIDDatatypeShape + a sh:NodeShape ; + sh:property [ + sh:path skos:prefLabel ; + sh:datatype xsd:string ; + sh:nodeKind sh:Literal ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:pattern " recordID$" ; + ] ; + sh:property [ + sh:path skos:definition ; + sh:datatype xsd:string ; + sh:nodeKind sh:Literal ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:pattern "^An identifier for the record$" ; + ] ; + sh:property [ + sh:path prov:qualifiedAttribution ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:node bdrsh:AttributionShape ; + sh:class prov:Attribution ; + ] . + +bdrsh:AttributionShape + a sh:NodeShape ; + sh:property [ + sh:path prov:agent ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:nodeKind sh:IRI ; + ] ; + sh:property [ + sh:path prov:hadRole ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:nodeKind sh:IRI ; + ] . + +bdrsh:BiodiversityRecordShape + a sh:NodeShape ; + sh:targetClass abis:BiodiversityRecord ; + sh:property [ + sh:path schema:identifier ; + sh:nodeKind sh:Literal ; + sh:minCount 1 ; + sh:maxCount 1 ; + ] ; + sh:property [ + sh:path schema:about ; + sh:nodeKind sh:IRI ; + sh:class dwc:Occurrence ; + sh:minCount 1 ; + sh:maxCount 1 ; + ] . + +bdrsh:CollectionShape + a sh:NodeShape ; + sh:targetClass schema:Collection ; + sh:xone ( + bdrsh:GeneralCollectionShape + bdrsh:SensitivityCategoryCollectionShape + ) . + +bdrsh:GeneralCollectionShape + a sh:NodeShape ; + sh:class schema:Collection ; + sh:property [ + sh:path schema:isPartOf ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:nodeKind sh:IRI ; + sh:class tern:Dataset ; + ] ; + sh:property [ + sh:path schema:name ; + sh:nodeKind sh:Literal ; + sh:pattern "^Occurrence Collection - (?:Identification Qualifier|Identification Remarks|Data Generalizations|Taxon Rank|Habitat|Basis Of Record|Preparations|Conservation Authority) - " ; + sh:datatype xsd:string ; + sh:minCount 1; + sh:maxCount 1; + ] ; + sh:property [ + sh:path tern:hasAttribute ; + sh:nodeKind sh:IRI ; + sh:class tern:Attribute; + sh:maxCount 1; + ] . + +# Shape exclusively relevant for a sensitivity category collection +bdrsh:SensitivityCategoryCollectionShape + a sh:NodeShape ; + sh:class schema:Collection ; + sh:property [ + sh:path schema:isPartOf ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:nodeKind sh:IRI ; + sh:class tern:Dataset ; + ] ; + sh:property [ + sh:path schema:name ; + sh:nodeKind sh:Literal ; + sh:pattern "^Occurrence Collection - Sensitivity Category - " ; + sh:datatype xsd:string ; + sh:minCount 1; + sh:maxCount 1; + ] ; + sh:property [ + sh:path tern:hasAttribute ; + sh:nodeKind sh:IRI ; + sh:class tern:Attribute; + sh:maxCount 1; + ] ; + sh:property [ + sh:path schema:member ; + sh:nodeKind sh:IRI ; + sh:class abis:BiodiversityRecord ; + sh:minCount 1; + ] . + +bdrsh:SurveyOccurrenceShape + a sh:NodeShape ; + sh:targetClass dwc:Occurrence ; + sh:class tern:FeatureOfInterest, + dwc:Occurrence ; + sh:property [ + sh:path schema:identifier ; + sh:nodeKind sh:Literal ; + ] ; + sh:property [ + sh:path tern:featureType ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:nodeKind sh:IRI ; + ] ; + sh:property [ + sh:path schema:spatial ; + sh:nodeKind sh:BlankNode ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:class geo:Geometry ; + ] ; + sh:property [ + sh:path schema:temporal ; + sh:nodeKind sh:BlankNode ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:class time:TemporalEntity ; + ] ; + sh:property [ + sh:path sosa:usedProcedure ; + sh:nodeKind sh:IRI ; + sh:maxCount 1 ; + sh:minCount 1 ; + ] ; + sh:property [ + sh:path tern:locationDescription ; + sh:datatype xsd:string ; + sh:nodeKind sh:Literal ; + sh:maxCount 1 ; + ] ; + sh:property [ + sh:path prov:wasAssociatedWith ; + sh:nodeKind sh:IRI ; + sh:maxCount 1 ; + sh:class prov:Agent ; + ] ; + # dwc:Occurrence has 2 schema:isPartOf triples, one for the tern:Survey and one for the tern:Dataset: + sh:property [ + sh:path schema:isPartOf ; + sh:nodeKind sh:IRI ; + sh:minCount 2 ; + sh:maxCount 2 ; + ] ; + sh:property [ + sh:path schema:isPartOf ; + sh:qualifiedValueShape [ + sh:nodeKind sh:IRI ; + sh:class tern:Survey ; + ] ; + sh:qualifiedMinCount 1 ; + sh:qualifiedMaxCount 1 ; + ] ; + sh:property [ + sh:path schema:isPartOf ; + sh:qualifiedValueShape [ + sh:nodeKind sh:IRI ; + sh:class tern:Dataset ; + ] ; + sh:qualifiedMinCount 1 ; + sh:qualifiedMaxCount 1 ; + ] ; + sh:property [ + sh:path tern:hasSite ; + sh:nodeKind sh:IRI ; + sh:maxCount 1 ; + sh:class tern:Site ; + ] ; + sh:property [ + sh:path tern:hasSiteVisit ; + sh:nodeKind sh:IRI ; + sh:maxCount 1 ; + sh:class tern:SiteVisit ; + ] . diff --git a/abis_mapping/templates/survey_site_data_v3/README.md b/abis_mapping/templates/survey_site_data_v3/README.md new file mode 100644 index 00000000..bce67b50 --- /dev/null +++ b/abis_mapping/templates/survey_site_data_v3/README.md @@ -0,0 +1,5 @@ +# Template Description +TBC + +# Template Instructions +See `instructions.pdf` for more details diff --git a/abis_mapping/templates/survey_site_data_v3/examples/minimal-error-duplicate-site-ids.csv b/abis_mapping/templates/survey_site_data_v3/examples/minimal-error-duplicate-site-ids.csv new file mode 100644 index 00000000..91b95810 --- /dev/null +++ b/abis_mapping/templates/survey_site_data_v3/examples/minimal-error-duplicate-site-ids.csv @@ -0,0 +1,3 @@ +siteID,siteIDSource,siteType,siteName,siteDescription,habitat,relatedSiteID,relationshipToRelatedSite,locality,decimalLatitude,decimalLongitude,footprintWKT,geodeticDatum,coordinateUncertaintyInMeters,dataGeneralizations +P1,WAM,Plot,Plot 1,Fine woody debris.,,,,Cowaramup Bay Road,-34.036,146.363,"LINESTRING (146.363 -34.036, 146.363 -34.037)",WGS84,50, +P1,WAM,Plot,Plot 1,Fine woody debris.,,,,Cowaramup Bay Road,-34.036,146.363,"LINESTRING (146.363 -34.036, 146.363 -34.037)",WGS84,50, diff --git a/abis_mapping/templates/survey_site_data_v3/examples/minimal-error-missing-fields.csv b/abis_mapping/templates/survey_site_data_v3/examples/minimal-error-missing-fields.csv new file mode 100644 index 00000000..211eda4e --- /dev/null +++ b/abis_mapping/templates/survey_site_data_v3/examples/minimal-error-missing-fields.csv @@ -0,0 +1,2 @@ +siteID,siteIDSource,siteType,siteName,siteDescription,habitat,relatedSiteID,relationshipToRelatedSite,locality,decimalLatitude,decimalLongitude,footprintWKT,geodeticDatum,coordinateUncertaintyInMeters,dataGeneralizations +P1,WAM,Plot,Plot 1,Fine woody debris.,,,part of,Cowaramup Bay Road,-34.036,146.363,"LINESTRING (146.363 -34.036, 146.363 -34.037)","",50, diff --git a/abis_mapping/templates/survey_site_data_v3/examples/minimal.csv b/abis_mapping/templates/survey_site_data_v3/examples/minimal.csv new file mode 100644 index 00000000..411e8da1 --- /dev/null +++ b/abis_mapping/templates/survey_site_data_v3/examples/minimal.csv @@ -0,0 +1,5 @@ +siteID,siteIDSource,siteType,siteName,siteDescription,habitat,relatedSiteID,relationshipToRelatedSite,locality,decimalLatitude,decimalLongitude,footprintWKT,geodeticDatum,coordinateUncertaintyInMeters,dataGeneralizations +P0,WAM,Site,ParentSite,Footprint of study area,Closed forest,,,Cowaramup Bay Road,,,"POLYGON ((114.98 -33.85, 115.01 -33.85, 115.01 -33.87, 114.98 -33.87, 114.98 -33.85))",WGS84,50, +P1,WAM,Plot,Plot 1,Fine woody debris.,Closed forest,P0,partOf,Cowaramup Bay Road,-33.85,114.99,"LINESTRING (114.99 -33.85, 115.00 -33.85)",WGS84,50,Coordinates rounded to the nearest 10 km for conservation concern +P2,WAM,Plot,Plot 2,Fine woody debris.,Closed forest,S0,sameAs,Cowaramup Bay Road,-33.85,114.99,"LINESTRING (114.99 -33.85, 115.00 -33.85)",WGS84,50,Coordinates rounded to the nearest 10 km for conservation concern +P3,WAM,Plot,Plot 3,Fine woody debris.,Closed forest,http://example.com/site/S0,sameAs,Cowaramup Bay Road,-33.85,114.99,"LINESTRING (114.99 -33.85, 115.00 -33.85)",WGS84,50,Coordinates rounded to the nearest 10 km for conservation concern diff --git a/abis_mapping/templates/survey_site_data_v3/examples/minimal.ttl b/abis_mapping/templates/survey_site_data_v3/examples/minimal.ttl new file mode 100644 index 00000000..e144174a --- /dev/null +++ b/abis_mapping/templates/survey_site_data_v3/examples/minimal.ttl @@ -0,0 +1,198 @@ +@prefix geo: . +@prefix prov: . +@prefix rdf: . +@prefix rdfs: . +@prefix schema: . +@prefix skos: . +@prefix tern: . +@prefix xsd: . + + a schema:Collection ; + schema:isPartOf ; + schema:member , + , + ; + schema:name "Site Collection - Data Generalizations - Coordinates rounded to the nearest 10 km for conservation concern" ; + tern:hasAttribute . + + a schema:Collection ; + schema:isPartOf ; + schema:member , + , + , + ; + schema:name "Site Collection - Habitat - Closed forest" ; + tern:hasAttribute . + + a rdfs:Datatype ; + skos:definition "An identifier for the site" ; + skos:prefLabel "WAM Site ID" ; + prov:qualifiedAttribution . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "Coordinates rounded to the nearest 10 km for conservation concern" ; + tern:hasValue . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "Closed forest" ; + tern:hasValue . + + a prov:Attribution ; + prov:agent ; + prov:hadRole . + + a tern:Text, + tern:Value ; + rdf:value "Coordinates rounded to the nearest 10 km for conservation concern" . + + a tern:IRI, + tern:Value ; + rdfs:label "Closed forest" ; + rdf:value . + + a prov:Agent ; + schema:name "WAM" . + + a tern:Site ; + geo:hasGeometry _:N9466fd6e9e4c9aa92b83d28000000000 ; + schema:additionalType ; + schema:description "Footprint of study area" ; + schema:identifier "P0"^^ ; + schema:isPartOf ; + schema:name "ParentSite" ; + tern:featureType ; + tern:locationDescription "Cowaramup Bay Road" . + + a tern:Site ; + geo:hasGeometry _:N3bba75fe5be4a400a5af80dd00000000, + _:N9466fd6e9e4c9aa92b83d28000000001 ; + schema:additionalType ; + schema:description "Fine woody debris." ; + schema:identifier "P1"^^ ; + schema:isPartOf , + ; + schema:name "Plot 1" ; + tern:featureType ; + tern:locationDescription "Cowaramup Bay Road" . + + a tern:Site ; + geo:hasGeometry _:N3bba75fe5be4a400a5af80dd00000001, + _:N9466fd6e9e4c9aa92b83d28000000002 ; + schema:additionalType ; + schema:description "Fine woody debris." ; + schema:identifier "P2"^^ ; + schema:isPartOf ; + schema:name "Plot 2" ; + schema:sameAs "S0" ; + tern:featureType ; + tern:locationDescription "Cowaramup Bay Road" . + + a tern:Site ; + geo:hasGeometry _:N3bba75fe5be4a400a5af80dd00000002, + _:N9466fd6e9e4c9aa92b83d28000000003 ; + schema:additionalType ; + schema:description "Fine woody debris." ; + schema:identifier "P3"^^ ; + schema:isPartOf ; + schema:name "Plot 3" ; + schema:sameAs "http://example.com/site/S0"^^xsd:anyURI ; + tern:featureType ; + tern:locationDescription "Cowaramup Bay Road" . + + a tern:Dataset . + +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " POLYGON ((-33.85 114.98, -33.85 115.01, -33.87 115.01, -33.87 114.98, -33.85 114.98))"^^geo:wktLiteral ; + geo:hasMetricSpatialAccuracy 5e+01 ] ; + rdf:object _:N9466fd6e9e4c9aa92b83d28000000000 ; + rdf:predicate geo:hasGeometry ; + rdf:subject ; + rdfs:comment "supplied as" . + +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " LINESTRING (-33.85 114.99, -33.85 115)"^^geo:wktLiteral ; + geo:hasMetricSpatialAccuracy 5e+01 ] ; + rdf:object _:N9466fd6e9e4c9aa92b83d28000000001 ; + rdf:predicate geo:hasGeometry ; + rdf:subject ; + rdfs:comment "supplied as" . + +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " LINESTRING (-33.85 114.99, -33.85 115)"^^geo:wktLiteral ; + geo:hasMetricSpatialAccuracy 5e+01 ] ; + rdf:object _:N9466fd6e9e4c9aa92b83d28000000002 ; + rdf:predicate geo:hasGeometry ; + rdf:subject ; + rdfs:comment "supplied as" . + +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " LINESTRING (-33.85 114.99, -33.85 115)"^^geo:wktLiteral ; + geo:hasMetricSpatialAccuracy 5e+01 ] ; + rdf:object _:N9466fd6e9e4c9aa92b83d28000000003 ; + rdf:predicate geo:hasGeometry ; + rdf:subject ; + rdfs:comment "supplied as" . + +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " POINT (-33.85 114.99)"^^geo:wktLiteral ; + geo:hasMetricSpatialAccuracy 5e+01 ] ; + rdf:object _:N3bba75fe5be4a400a5af80dd00000000 ; + rdf:predicate geo:hasGeometry ; + rdf:subject ; + rdfs:comment "supplied as" . + +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " POINT (-33.85 114.99)"^^geo:wktLiteral ; + geo:hasMetricSpatialAccuracy 5e+01 ] ; + rdf:object _:N3bba75fe5be4a400a5af80dd00000001 ; + rdf:predicate geo:hasGeometry ; + rdf:subject ; + rdfs:comment "supplied as" . + +[] a rdf:Statement ; + geo:hasGeometry [ a geo:Geometry ; + geo:asWKT " POINT (-33.85 114.99)"^^geo:wktLiteral ; + geo:hasMetricSpatialAccuracy 5e+01 ] ; + rdf:object _:N3bba75fe5be4a400a5af80dd00000002 ; + rdf:predicate geo:hasGeometry ; + rdf:subject ; + rdfs:comment "supplied as" . + +_:N3bba75fe5be4a400a5af80dd00000000 a geo:Geometry ; + geo:asWKT " POINT (-33.85 114.99)"^^geo:wktLiteral ; + geo:hasMetricSpatialAccuracy 5e+01 . + +_:N3bba75fe5be4a400a5af80dd00000001 a geo:Geometry ; + geo:asWKT " POINT (-33.85 114.99)"^^geo:wktLiteral ; + geo:hasMetricSpatialAccuracy 5e+01 . + +_:N3bba75fe5be4a400a5af80dd00000002 a geo:Geometry ; + geo:asWKT " POINT (-33.85 114.99)"^^geo:wktLiteral ; + geo:hasMetricSpatialAccuracy 5e+01 . + +_:N9466fd6e9e4c9aa92b83d28000000000 a geo:Geometry ; + geo:asWKT " POLYGON ((-33.85 114.98, -33.85 115.01, -33.87 115.01, -33.87 114.98, -33.85 114.98))"^^geo:wktLiteral ; + geo:hasMetricSpatialAccuracy 5e+01 . + +_:N9466fd6e9e4c9aa92b83d28000000001 a geo:Geometry ; + geo:asWKT " LINESTRING (-33.85 114.99, -33.85 115)"^^geo:wktLiteral ; + geo:hasMetricSpatialAccuracy 5e+01 . + +_:N9466fd6e9e4c9aa92b83d28000000002 a geo:Geometry ; + geo:asWKT " LINESTRING (-33.85 114.99, -33.85 115)"^^geo:wktLiteral ; + geo:hasMetricSpatialAccuracy 5e+01 . + +_:N9466fd6e9e4c9aa92b83d28000000003 a geo:Geometry ; + geo:asWKT " LINESTRING (-33.85 114.99, -33.85 115)"^^geo:wktLiteral ; + geo:hasMetricSpatialAccuracy 5e+01 . + diff --git a/abis_mapping/templates/survey_site_data_v3/mapping.py b/abis_mapping/templates/survey_site_data_v3/mapping.py new file mode 100644 index 00000000..24a1db61 --- /dev/null +++ b/abis_mapping/templates/survey_site_data_v3/mapping.py @@ -0,0 +1,854 @@ +"""Provides ABIS Mapper for `survey_site_data-v3.0.0.csv` template.""" + +# Standard +import dataclasses +import decimal + +# Third-party +import rdflib +import frictionless +import frictionless.checks +import shapely +import shapely.geometry + +# Local +from abis_mapping import base +from abis_mapping import plugins +from abis_mapping import models +from abis_mapping import utils +from abis_mapping import vocabs + +# Typing +from typing import Any, Literal + + +# Constants and shortcuts +a = rdflib.RDF.type +HABITAT = rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/2090cfd9-8b6b-497b-9512-497456a18b99") +CONCEPT_DATA_GENERALIZATIONS = utils.rdf.uri("concept/data-generalizations", utils.namespaces.EXAMPLE) +DATA_ROLE_RESOURCE_PROVIDER = rdflib.URIRef("https://linked.data.gov.au/def/data-roles/resourceProvider") + + +# Dataclasses used in mapping +@dataclasses.dataclass +class AttributeValue: + """Contains data items to enable producing attribute, value and collection nodes""" + + raw: str + attribute: rdflib.URIRef + value: rdflib.URIRef + collection: rdflib.URIRef + + +@dataclasses.dataclass +class Agent: + """Contains data items to enable producing agent nodes""" + + raw: str + uri: rdflib.URIRef + + +class SurveySiteMapper(base.mapper.ABISMapper): + """ABIS Mapper for `survey_site_data.csv` v3""" + + def apply_validation( + self, + data: base.types.ReadableType, + **kwargs: Any, + ) -> frictionless.Report: + """Applies Frictionless Validation for the `survey_site_data.csv` Template + + Args: + data (base.types.ReadableType): Raw data to be validated. + **kwargs (Any): Additional keyword arguments. + + Keyword Args: + site_id_map (dict[str, bool]): Site ids present in the occurrence template. + + Returns: + frictionless.Report: Validation report for the specified data. + """ + # Extract keyword arguments + site_id_map: dict[str, bool] = kwargs.get("site_id_map", {}) + + # Construct schema + schema = self.extra_fields_schema( + data=data, + full_schema=True, + ) + + # Construct resource + resource = frictionless.Resource( + source=data, + format="csv", + schema=schema, + encoding="utf-8", + ) + + # Validate + report = resource.validate( + checklist=frictionless.Checklist( + checks=[ + # Extra custom checks + plugins.tabular.IsTabular(), + plugins.empty.NotEmpty(), + plugins.sites_geometry.SitesGeometry( + occurrence_site_ids=set(site_id_map), + ), + plugins.mutual_inclusion.MutuallyInclusive( + field_names=["relatedSiteID", "relationshipToRelatedSite"], + ), + # When relationshipToRelatedSite is 'partOf' + # then the relatedSiteID must exist as a siteID in the same template. + plugins.related_site_id_part_of_lookup.RelatedSiteIDPartOfLookup( + site_ids=set(self.extract_site_ids(data)) + ), + ], + ) + ) + + # Return validation report + return report + + def extract_site_ids( + self, + data: base.types.ReadableType, + ) -> dict[str, Literal[True]]: + """Constructs a key mapped 'set' of all ids. + + Args: + data: Raw data to be mapped + """ + # Construct schema + schema = frictionless.Schema.from_descriptor(self.schema()) + + # Construct resource + resource = frictionless.Resource(source=data, format="csv", schema=schema, encoding="utf-8") + + with resource.open() as r: + # Create empty dictionary to hold mapping values + result: dict[str, Literal[True]] = {} + for row in r.row_stream: + # Extract value + site_id: str | None = row["siteID"] + + if site_id: + result[site_id] = True + + return result + + def extract_geometry_defaults( + self, + data: base.types.ReadableType, + ) -> dict[str, str]: + """Constructs a dictionary mapping site id to default WKT. + + The resulting string WKT returned can then be used as the missing + geometry for other related templates i.e. the site occurrences + + Args: + data (base.types.ReadableType): Raw data to be mapped. + + Returns: + dict[str, str]: Keys are the site id; values are the + appropriate point WKT serialized string. If none then + there is no siteID key created. Values include the geodetic + datum uri. + """ + # Construct schema + schema = frictionless.Schema.from_descriptor(self.schema()) + + # Construct resource + resource = frictionless.Resource( + source=data, + format="csv", + schema=schema, + encoding="utf-8", + ) + + # Context manager for row streaming + with resource.open() as r: + # Create empty dictionary to hold mapping values + result: dict[str, str] = {} + for row in r.row_stream: + # Extract values + site_id: str | None = row["siteID"] + + # Check for siteID, even though siteID is a mandatory field, it can be missing here + # because this method is called for cross-validation, regardless of if this template is valid. + if not site_id: + continue + + footprint_wkt: shapely.geometry.base.BaseGeometry | None = row["footprintWKT"] + longitude: decimal.Decimal | None = row["decimalLongitude"] + latitude: decimal.Decimal | None = row["decimalLatitude"] + datum: str | None = row["geodeticDatum"] + + # if no valid datum for row then don't add to map. + if datum is None: + continue + + try: + # Default to using the footprint wkt + geodetic datum + if footprint_wkt is not None: + # Create string and add to map for site id + result[site_id] = str( + models.spatial.Geometry( + raw=footprint_wkt.centroid, + datum=datum, + ).to_rdf_literal() + ) + continue + + # If not footprint then we revert to using supplied longitude & latitude + if longitude is not None and latitude is not None: + # Create string and add to map for site id + result[site_id] = str( + models.spatial.Geometry( + raw=shapely.Point([float(longitude), float(latitude)]), + datum=datum, + ).to_rdf_literal() + ) + except models.spatial.GeometryError: + continue + + return result + + def apply_mapping_row( + self, + *, + row: frictionless.Row, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + extra_schema: frictionless.Schema, + base_iri: rdflib.Namespace, + **kwargs: Any, + ) -> None: + """Applies mapping for a row in the `survey_site_data.csv` template. + + Args: + row (frictionless.Row): Row to be processed in the dataset. + dataset (rdflib.URIRef): Dataset IRI this row is a part of. + graph (rdflib.URIRef): Graph to map row into. + extra_schema (frictionless.Schema): Schema of extra fields. + base_iri (rdflib.Namespace): Optional base IRI to use for mapping. + """ + # TERN.Site subject IRI - Note this needs to match the iri construction of the + # survey site visit and occurrence template mapping, ensuring they will resolve properly. + site_id: str = row["siteID"] + site = utils.iri_patterns.site_iri(base_iri, site_id) + + # Conditionally create uris dependent on siteIDSource + site_id_src: str | None = row["siteIDSource"] + if site_id_src: + site_id_datatype = utils.iri_patterns.datatype_iri("siteID", site_id_src) + site_id_agent = utils.iri_patterns.agent_iri(site_id_src) + site_id_attribution = utils.iri_patterns.attribution_iri(base_iri, "resourceProvider", site_id_src) + else: + site_id_datatype = None + site_id_agent = None + site_id_attribution = None + + # Conditionally create uri dependent on relatedSiteID + related_site_id: str | None = row["relatedSiteID"] + relationship_to_related_site: str | None = row["relationshipToRelatedSite"] + related_site: rdflib.URIRef | rdflib.Literal | None + if related_site_id and relationship_to_related_site: + # Get vocab to conditionally create related site + rtor_site_vocab = self.fields()["relationshipToRelatedSite"].get_vocab() + if rtor_site_vocab().get(relationship_to_related_site) == rdflib.SDO.isPartOf: + # Related site is defined internal to the dataset + related_site = utils.iri_patterns.site_iri(base_iri, related_site_id) + else: + # Related site is defined outside the dataset + related_site = utils.rdf.uri_or_string_literal(related_site_id) + else: + related_site = None + + # Conditionally create uris dependent on dataGeneralizations + data_generalizations: str | None = row["dataGeneralizations"] + if data_generalizations: + data_generalizations_attribute = utils.iri_patterns.attribute_iri( + base_iri, "dataGeneralizations", data_generalizations + ) + data_generalizations_value = utils.iri_patterns.attribute_value_iri( + base_iri, "dataGeneralizations", data_generalizations + ) + data_generalizations_collection = utils.iri_patterns.attribute_collection_iri( + base_iri, "Site", "dataGeneralizations", data_generalizations + ) + else: + data_generalizations_attribute = None + data_generalizations_value = None + data_generalizations_collection = None + + # Create habitat attribute and value objects + habitat_objects: list[AttributeValue] = [] + if habitats := row["habitat"]: + for habitat in habitats: + habitat_objects.append( + AttributeValue( + raw=habitat, + attribute=utils.iri_patterns.attribute_iri(base_iri, "habitat", habitat), + value=utils.iri_patterns.attribute_value_iri(base_iri, "habitat", habitat), + collection=utils.iri_patterns.attribute_collection_iri(base_iri, "Site", "habitat", habitat), + ) + ) + + # Add site + self.add_site( + uri=site, + dataset=dataset, + site_id_datatype=site_id_datatype, + related_site=related_site, + row=row, + graph=graph, + base_iri=base_iri, + ) + + # Add site id datatype + self.add_site_id_datatype( + uri=site_id_datatype, + attribution=site_id_attribution, + row=row, + graph=graph, + ) + + # Add site id attribution + self.add_site_id_attribution( + uri=site_id_attribution, + agent=site_id_agent, + graph=graph, + ) + + # Add site id agent + self.add_site_id_agent( + uri=site_id_agent, + row=row, + graph=graph, + ) + + # Iterate through habitat objects + for habitat_object in habitat_objects: + # Add habitat attribute + self.add_habitat_attribute( + uri=habitat_object.attribute, + value=habitat_object.value, + dataset=dataset, + raw=habitat_object.raw, + graph=graph, + ) + + # Add habitat value + self.add_habitat_value( + uri=habitat_object.value, + dataset=dataset, + raw=habitat_object.raw, + graph=graph, + base_iri=base_iri, + ) + + # Add habitat attribute Collection + self.add_habitat_collection( + uri=habitat_object.collection, + raw_habitat_value=habitat_object.raw, + attribute=habitat_object.attribute, + site=site, + dataset=dataset, + graph=graph, + ) + + # Add data generalizations attribute + self.add_data_generalizations_attribute( + uri=data_generalizations_attribute, + value=data_generalizations_value, + dataset=dataset, + row=row, + graph=graph, + ) + + # Add data generalizations value + self.add_data_generalizations_value( + uri=data_generalizations_value, + row=row, + graph=graph, + ) + + # Add data generalizations attribute Collection + self.add_data_generalizations_collection( + uri=data_generalizations_collection, + raw_data_generalizations_value=data_generalizations, + attribute=data_generalizations_attribute, + site=site, + dataset=dataset, + graph=graph, + ) + + # Add geometry + self.add_footprint_geometry( + uri=site, + row=row, + graph=graph, + ) + + self.add_point_geometry( + uri=site, + row=row, + graph=graph, + ) + + # Add extra fields + self.add_extra_fields_json( + subject_uri=site, + row=row, + graph=graph, + extra_schema=extra_schema, + ) + + def add_site( + self, + uri: rdflib.URIRef, + dataset: rdflib.URIRef, + site_id_datatype: rdflib.URIRef | None, + related_site: rdflib.URIRef | rdflib.Literal | None, + row: frictionless.Row, + graph: rdflib.Graph, + base_iri: rdflib.Namespace, + ) -> None: + """Adds site to the graph. + + Args: + uri: URI to use for this node. + dataset: Dataset to which data belongs. + site_id_datatype: Datatype to use for + the site id literal. + related_site: Either the internal site uri that + this site relates to or a literal representation + from outside the dataset + row: Row to retrieve data from. + graph: Graph to be modified. + base_iri: Namespace used to construct IRIs + """ + # Extract relevant values + site_id = row["siteID"] + site_name = row["siteName"] + site_type = row["siteType"] + site_description = row["siteDescription"] + locality = row["locality"] + + # Add type + graph.add((uri, a, utils.namespaces.TERN.Site)) + + # Add dataset + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + + # Add siteID + dt = site_id_datatype if site_id_datatype is not None else rdflib.XSD.string + graph.add((uri, rdflib.SDO.identifier, rdflib.Literal(site_id, datatype=dt))) + + # Add related site if provided + if related_site is not None and (relationship_to_related_site := row["relationshipToRelatedSite"]): + # Retrieve vocab for field + relationship_to_related_site_vocab = self.fields()["relationshipToRelatedSite"].get_vocab() + # Retrieve term + relationship_to_related_site_term = relationship_to_related_site_vocab().get(relationship_to_related_site) + graph.add((uri, relationship_to_related_site_term, related_site)) + + # Add site tern featuretype + graph.add((uri, utils.namespaces.TERN.featureType, vocabs.site_type.SITE.iri)) + + if site_type: + # Retrieve vocab for field + site_type_vocab = self.fields()["siteType"].get_flexible_vocab() + + # Retrieve term or create on the fly + site_type_term = site_type_vocab(graph=graph, source=dataset, base_iri=base_iri).get(site_type) + + # Add to site type graph + graph.add((uri, rdflib.SDO.additionalType, site_type_term)) + + # Add site name if available + if site_name: + graph.add((uri, rdflib.SDO.name, rdflib.Literal(site_name))) + + # Add site description if available + if site_description: + graph.add((uri, rdflib.SDO.description, rdflib.Literal(site_description))) + + # Add locality as location description + if locality: + graph.add((uri, utils.namespaces.TERN.locationDescription, rdflib.Literal(locality))) + + def add_site_id_datatype( + self, + uri: rdflib.URIRef | None, + attribution: rdflib.URIRef | None, + row: frictionless.Row, + graph: rdflib.Graph, + ) -> None: + """Adds site id datatype to the graph. + + Args: + uri (rdflib.URIRef | None): Subject of the node. + attribution (rdflib.URIRef | None): Attribution that the datatype corresponds to. + row (frictionless.Row): Raw data. + graph (rdflib.Graph): Graph to be modified. + """ + # Check subject provided + if uri is None: + return + + # Add type + graph.add((uri, a, rdflib.RDFS.Datatype)) + + # Add label + graph.add((uri, rdflib.SKOS.prefLabel, rdflib.Literal(f"{row['siteIDSource']} Site ID"))) + + # Add definition + graph.add((uri, rdflib.SKOS.definition, rdflib.Literal("An identifier for the site"))) + + # Add attribution + if attribution is not None: + graph.add((uri, rdflib.PROV.qualifiedAttribution, attribution)) + + def add_site_id_attribution( + self, + uri: rdflib.URIRef | None, + agent: rdflib.URIRef | None, + graph: rdflib.Graph, + ) -> None: + """Adds site id attribution to the graph. + + Args: + uri (rdflib.URIRef | None): Subject of the node. + agent (rdflib.URIRef | None): Agent that the attribution corresponds to. + graph (rdflib.Graph): Graph to be modified. + """ + # Check subject provided + if uri is None: + return + + # Add attribution + graph.add((uri, a, rdflib.PROV.Attribution)) + + # Add agent + if agent is not None: + graph.add((uri, rdflib.PROV.agent, agent)) + + # Add hadRole + graph.add((uri, rdflib.PROV.hadRole, DATA_ROLE_RESOURCE_PROVIDER)) + + def add_site_id_agent( + self, + uri: rdflib.URIRef | None, + row: frictionless.Row, + graph: rdflib.Graph, + ) -> None: + """Adds the site id agent to the graph. + + Args: + uri (rdflib.URIRef | None): Subject of the node. + row (frictionless.Row): Raw data. + graph (rdflib.Graph): Graph to be modified. + """ + # Check subject provided + if uri is None: + return + + # Add type + graph.add((uri, a, rdflib.PROV.Agent)) + + # Add name + graph.add((uri, rdflib.SDO.name, rdflib.Literal(row["siteIDSource"]))) + + def add_habitat_attribute( + self, + uri: rdflib.URIRef, + value: rdflib.URIRef, + dataset: rdflib.URIRef, + raw: str, + graph: rdflib.Graph, + ) -> None: + """Adds a habitat attribute to the graph. + + Args: + uri (rdflib.URIRef): Subjcet of the node. + value (rdflib.URIRef): Corresponding value reference. + dataset (rdflib.URIRef): Dataset raw data belongs. + raw (str): Raw data. + graph (rdflib.Graph): Graph to be modified. + """ + # Add type + graph.add((uri, a, utils.namespaces.TERN.Attribute)) + + # Add dataset + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + + # Add tern values + graph.add((uri, utils.namespaces.TERN.attribute, HABITAT)) + graph.add((uri, utils.namespaces.TERN.hasSimpleValue, rdflib.Literal(raw))) + graph.add((uri, utils.namespaces.TERN.hasValue, value)) + + def add_habitat_value( + self, + uri: rdflib.URIRef, + dataset: rdflib.URIRef, + raw: str, + graph: rdflib.Graph, + base_iri: rdflib.Namespace, + ) -> None: + """Add a habitat value node to graph. + + Args: + uri (rdflib.URIRef): Subject of the node. + dataset (rdflib.URIRef): Dataset data belongs. + raw (str): Raw data provided. + graph (rdflib.Graph): Graph to be modified. + base_iri (rdflib.Namespace): Namespace used to construct IRIs + """ + # Add type + graph.add((uri, a, utils.namespaces.TERN.IRI)) + graph.add((uri, a, utils.namespaces.TERN.Value)) + + # Add label + graph.add((uri, rdflib.RDFS.label, rdflib.Literal(raw))) + + # Retrieve vocab for field + vocab = self.fields()["habitat"].get_flexible_vocab() + + # Add flexible vocab term + term = vocab(graph=graph, source=dataset, base_iri=base_iri).get(raw) + graph.add((uri, rdflib.RDF.value, term)) + + def add_habitat_collection( + self, + uri: rdflib.URIRef, + raw_habitat_value: str, + attribute: rdflib.URIRef, + site: rdflib.URIRef, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Add a habitat attribute Collection to the graph + + Args: + uri: The uri for the Collection. + raw_habitat_value: Habitat value from template. + attribute: The uri for the attribute node. + site: The uri for the site node. + dataset: The uri for the dateset node. + graph: The graph. + """ + # Add type + graph.add((uri, a, rdflib.SDO.Collection)) + # Add identifier + graph.add((uri, rdflib.SDO.name, rdflib.Literal(f"Site Collection - Habitat - {raw_habitat_value}"))) + # Add link to dataset + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + # add link to this site + graph.add((uri, rdflib.SDO.member, site)) + # Add link to attribute + graph.add((uri, utils.namespaces.TERN.hasAttribute, attribute)) + + def add_data_generalizations_attribute( + self, + uri: rdflib.URIRef | None, + value: rdflib.URIRef | None, + dataset: rdflib.URIRef, + row: frictionless.Row, + graph: rdflib.Graph, + ) -> None: + """Add the data generalizations attribute node to the graph. + + Args: + uri (rdflib.URIRef | None): Subject of the node. + value (rdflib.URIRef | None): Corresponding value. + dataset (rdflib.URIRef): Corresponding dataset data belongs. + row (frictionless.Row): Raw data. + graph (rdflib.Graph): Graph to be modified. + """ + # Check subject provided + if uri is None: + return + + # Add type + graph.add((uri, a, utils.namespaces.TERN.Attribute)) + + # Add dataset + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + + # Add tern values + graph.add((uri, utils.namespaces.TERN.attribute, CONCEPT_DATA_GENERALIZATIONS)) + graph.add((uri, utils.namespaces.TERN.hasSimpleValue, rdflib.Literal(row["dataGeneralizations"]))) + if value is not None: + graph.add((uri, utils.namespaces.TERN.hasValue, value)) + + def add_data_generalizations_value( + self, + uri: rdflib.URIRef | None, + row: frictionless.Row, + graph: rdflib.Graph, + ) -> None: + """Add data generalizations value node to graph. + + Args: + uri (rdflib.URIRef | None): Subject of the node. + row (frictionless.Row): Raw data. + graph (rdflib.Graph): Graph to be modified.: + """ + # Check subject provided + if uri is None: + return + + # Add type + graph.add((uri, a, utils.namespaces.TERN.Text)) + graph.add((uri, a, utils.namespaces.TERN.Value)) + + # Add raw value + graph.add((uri, rdflib.RDF.value, rdflib.Literal(row["dataGeneralizations"]))) + + def add_data_generalizations_collection( + self, + uri: rdflib.URIRef | None, + raw_data_generalizations_value: str | None, + attribute: rdflib.URIRef | None, + site: rdflib.URIRef, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Add a Data Generalizations attribute Collection to the graph + + Args: + uri: The uri for the Collection. + raw_data_generalizations_value: DataGeneralizations value from template. + attribute: The uri for the attribute node. + site: The uri for the site node. + dataset: The uri for the dateset node. + graph: The graph. + """ + if uri is None: + return + + # Add type + graph.add((uri, a, rdflib.SDO.Collection)) + # Add identifier + if raw_data_generalizations_value: + graph.add( + ( + uri, + rdflib.SDO.name, + rdflib.Literal(f"Site Collection - Data Generalizations - {raw_data_generalizations_value}"), + ) + ) + # Add link to dataset + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + # add link to this site + graph.add((uri, rdflib.SDO.member, site)) + # Add link to attribute + if attribute is not None: + graph.add((uri, utils.namespaces.TERN.hasAttribute, attribute)) + + def add_footprint_geometry( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + graph: rdflib.Graph, + ) -> None: + """Adds footprint geometry details to the graph. + + Args: + uri (rdflib.URIRef): URI to attach. + row (frictionless.Row): Row to retrieve data from. + graph (rdflib.Graph): Graph to be modified. + """ + # Extract values + geodetic_datum = row["geodeticDatum"] + footprint_wkt = row["footprintWKT"] + + if footprint_wkt is None or geodetic_datum is None: + return + + # Construct geometry + geometry = models.spatial.Geometry( + raw=footprint_wkt, + datum=geodetic_datum, + ) + + # Construct node + geometry_node = rdflib.BNode() + graph.add((geometry_node, a, utils.namespaces.GEO.Geometry)) + graph.add((geometry_node, utils.namespaces.GEO.asWKT, geometry.to_transformed_crs_rdf_literal())) + graph.add((uri, utils.namespaces.GEO.hasGeometry, geometry_node)) + + # Add coordinate uncertainty if available + coordinate_uncertainty = row["coordinateUncertaintyInMeters"] + if coordinate_uncertainty: + spatial_accuracy = rdflib.Literal(coordinate_uncertainty, datatype=rdflib.XSD.double) + graph.add((geometry_node, utils.namespaces.GEO.hasMetricSpatialAccuracy, spatial_accuracy)) + else: + spatial_accuracy = None + + # Add original geometry supplied as statement + self.add_geometry_supplied_as( + subj=uri, + pred=utils.namespaces.GEO.hasGeometry, + obj=geometry_node, + geom=geometry, + graph=graph, + spatial_accuracy=spatial_accuracy, + ) + + def add_point_geometry( + self, + uri: rdflib.URIRef, + row: frictionless.Row, + graph: rdflib.Graph, + ) -> None: + """Adds site point geometry details to the graph. + + Args: + uri (rdflib.URIRef): URI to attach. + row (frictionless.Row): Row to retrieve data from. + graph (rdflib.Graph): Graph to be modified. + """ + # Extract values + decimal_latitude = row["decimalLatitude"] + decimal_longitude = row["decimalLongitude"] + geodetic_datum = row["geodeticDatum"] + + if decimal_latitude is None or decimal_longitude is None or geodetic_datum is None: + return + + # Construct geometry + geometry = models.spatial.Geometry( + raw=models.spatial.LatLong(decimal_latitude, decimal_longitude), + datum=geodetic_datum, + ) + + # Construct node + geometry_node = rdflib.BNode() + graph.add((geometry_node, a, utils.namespaces.GEO.Geometry)) + graph.add((geometry_node, utils.namespaces.GEO.asWKT, geometry.to_transformed_crs_rdf_literal())) + graph.add((uri, utils.namespaces.GEO.hasGeometry, geometry_node)) + + # Add coordinate uncertainty if available + coordinate_uncertainty = row["coordinateUncertaintyInMeters"] + if coordinate_uncertainty: + spatial_accuracy = rdflib.Literal(coordinate_uncertainty, datatype=rdflib.XSD.double) + graph.add((geometry_node, utils.namespaces.GEO.hasMetricSpatialAccuracy, spatial_accuracy)) + else: + spatial_accuracy = None + + # Add original geometry supplied as statement + self.add_geometry_supplied_as( + subj=uri, + pred=utils.namespaces.GEO.hasGeometry, + obj=geometry_node, + geom=geometry, + graph=graph, + spatial_accuracy=spatial_accuracy, + ) + + +# Register Mapper +base.mapper.register_mapper(SurveySiteMapper) diff --git a/abis_mapping/templates/survey_site_data_v3/metadata.json b/abis_mapping/templates/survey_site_data_v3/metadata.json new file mode 100644 index 00000000..1ccbc1e6 --- /dev/null +++ b/abis_mapping/templates/survey_site_data_v3/metadata.json @@ -0,0 +1,13 @@ +{ + "name": "survey_site_data", + "label": "Systematic Survey Site Data Template", + "version": "3.0.0", + "description": "A template for systematic survey site data", + "biodiversity_type": "Systematic Survey Site Data", + "spatial_type": "Point, line, polygon", + "file_type": "CSV", + "sampling_type": "systematic survey", + "template_url": "https://raw.githubusercontent.com/gaiaresources/abis-mapping/main/abis_mapping/templates/survey_site_data_v3/survey_site_data.csv", + "schema_url": "https://raw.githubusercontent.com/gaiaresources/abis-mapping/main/abis_mapping/templates/survey_site_data_v3/schema.json", + "template_lifecycle_status": "beta" +} diff --git a/abis_mapping/templates/survey_site_data_v3/schema.json b/abis_mapping/templates/survey_site_data_v3/schema.json new file mode 100644 index 00000000..bb36837a --- /dev/null +++ b/abis_mapping/templates/survey_site_data_v3/schema.json @@ -0,0 +1,217 @@ +{ + "fields": [ + { + "name": "siteID", + "title": "Site ID", + "description": "A unique within dataset string identifier for the site. Valid values include strings that are used specifically for this survey or URIs from BDR Sites that have been established in previous surveys.", + "example": "P1", + "type": "string", + "format": "default", + "constraints": { + "required": true, + "unique": true + } + }, + { + "name": "siteIDSource", + "title": "Site ID Source", + "description": "The organisation that assigned the SiteID to this Site", + "example": "TERN", + "type": "string", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "siteType", + "title": "Site Type", + "description": "The type of site that relates to its sampling type and/or dimensions.", + "example": "Plot", + "type": "string", + "format": "default", + "constraints": { + "required": false + }, + "vocabularies": [ + "SITE_TYPE" + ] + }, + { + "name": "siteName", + "title": "Site Name", + "description": "A name for the site that may be more descriptive than the siteID.", + "example": "Plot 1", + "type": "string", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "siteDescription", + "title": "Site Description", + "description": "The site (plot) description covers important aspects of the site (generally of the land surface). Some overlap in collected information does occur due to the modular nature of the survey processes. The description provides significant background information to gain an appreciation of the plot history, topography, position in the landscape and for understanding the likely relationship between the soils, vegetation and fauna.", + "example": "Fine woody debris.", + "type": "string", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "habitat", + "title": "Habitat", + "description": "A collection of habitat types representing the dominant vegetation structural formation class adopted by the National Vegetation Information System (NVIS).", + "example": "Chenopod Shrubland | Closed Fernland", + "type": "list", + "format": "default", + "url": "https://dwc.tdwg.org/terms/#dwc:habitat", + "constraints": { + "required": false + }, + "vocabularies": [ + "TARGET_HABITAT_SCOPE" + ] + }, + { + "name": "relatedSiteID", + "title": "Related SiteID", + "description": "Identifier of a related site to the specified site e.g. parent site, same site with different identifier.", + "example": "Same as within dataset or existing URI", + "type": "string", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "relationshipToRelatedSite", + "title": "Relationship To Related Site", + "description": "Relationship between the site and the related site. This field can be used to record Site identifiers for the same site from different custodians through the use of URIs.", + "example": "Same as within dataset or existing URI", + "type": "string", + "format": "default", + "constraints": { + "required": false, + "enum": [ + "partOf", + "sameAs", + "part of", + "same as", + "Part of", + "Same as" + ] + }, + "vocabularies": [ + "RELATIONSHIP_TO_RELATED_SITE" + ] + }, + { + "name": "locality", + "title": "Locality", + "description": "The specific description of the place.", + "example": "Cowaramup Bay Road", + "type": "string", + "format": "default", + "url": "https://dwc.tdwg.org/terms/#dwc:locality", + "constraints": { + "required": false + } + }, + { + "name": "decimalLatitude", + "title": "Decimal Latitude", + "description": "The geographic latitude (in decimal degrees, using the spatial reference system given in geodeticDatum) of the geographic origin of a Site. Positive values are north of the Equator, negative values are south of it. Legal values lie between -90 and 0, inclusive for Southern hemisphere.", + "example": "-34.036", + "type": "number", + "format": "default", + "url": "https://dwc.tdwg.org/terms/#dwc:decimalLatitude", + "constraints": { + "required": false, + "minimum": -90, + "maximum": 0 + } + }, + { + "name": "decimalLongitude", + "title": "Decimal Longitude", + "description": "The geographic longitude (in decimal degrees, using the spatial reference system given in geodeticDatum) of the geographic origin of a Site. Positive values are east of the Greenwich Meridian, negative values are west of it. Legal values lie between 0 and 180, inclusive for the BDR use case.", + "example": "146.363", + "type": "number", + "format": "default", + "url": "https://dwc.tdwg.org/terms/#dwc:decimalLongitude", + "constraints": { + "required": false, + "minimum": 0, + "maximum": 180 + } + }, + { + "name": "footprintWKT", + "title": "Footprint WKT", + "description": "A Well-Known Text (WKT) representation of the shape (footprint, geometry) that defines the Site. A Site may have both a point-radius representation and a footprint representation, and they may differ from each other.", + "example": "LINESTRING (146.363 -34.036, 146.363 -34.037)", + "type": "wkt", + "format": "default", + "url": "https://dwc.tdwg.org/terms/#dwc:footprintWKT", + "constraints": { + "required": false + } + }, + { + "name": "geodeticDatum", + "title": "Geodetic Datum", + "description": "The geodetic datum, or spatial reference system (SRS) upon which the geographic coordinates given for the Site are based.", + "example": "WGS84", + "type": "string", + "format": "default", + "url": "https://dwc.tdwg.org/terms/#dwc:geodeticDatum", + "constraints": { + "required": false, + "enum": [ + "AGD66", + "EPSG:4202", + "AGD84", + "EPSG:4203", + "GDA2020", + "EPSG:7844", + "GDA94", + "EPSG:4283", + "WGS84", + "EPSG:4326" + ] + }, + "vocabularies": [ + "GEODETIC_DATUM" + ] + }, + { + "name": "coordinateUncertaintyInMeters", + "title": "Coordinate Uncertainty in Meters", + "description": "The horizontal distance (in metres) from the given decimalLatitude and decimalLongitude describing the smallest circle containing the whole of the Site. Leave the value empty if the uncertainty is unknown, cannot be estimated, or is not applicable (because there are no coordinates). Zero is not a valid value for this term.", + "example": "50", + "type": "integer", + "format": "default", + "url": "https://dwc.tdwg.org/terms/#dwc:coordinateUncertaintyInMeters", + "constraints": { + "required": false, + "minimum": 1 + } + }, + { + "name": "dataGeneralizations", + "title": "Data Generalizations", + "description": "Actions taken to make the shared data less specific or complete than in its original form.", + "example": "Coordinates given in decimalLatitude, decimalLongitude, easting and northing have been rounded to 0.1 DEG. The observer name has been changed to a unique User ID.", + "type": "string", + "format": "default", + "url": "https://dwc.tdwg.org/terms/#dwc:dataGeneralizations", + "constraints": { + "required": false + } + } + ], + "primaryKey": "siteID" +} + diff --git a/abis_mapping/templates/survey_site_data_v3/survey_site_data.csv b/abis_mapping/templates/survey_site_data_v3/survey_site_data.csv new file mode 100644 index 00000000..76b423e1 --- /dev/null +++ b/abis_mapping/templates/survey_site_data_v3/survey_site_data.csv @@ -0,0 +1 @@ +siteID,siteIDSource,siteType,siteName,siteDescription,habitat,relatedSiteID,relationshipToRelatedSite,locality,decimalLatitude,decimalLongitude,footprintWKT,geodeticDatum,coordinateUncertaintyInMeters,dataGeneralizations diff --git a/abis_mapping/templates/survey_site_data_v3/templates/instructions.md b/abis_mapping/templates/survey_site_data_v3/templates/instructions.md new file mode 100644 index 00000000..fb62bca3 --- /dev/null +++ b/abis_mapping/templates/survey_site_data_v3/templates/instructions.md @@ -0,0 +1,173 @@ +{% extends "BASE_TEMPLATE base.md" %} +{% block body %} +# SYSTEMATIC SURVEY SITE DATA TEMPLATE INSTRUCTIONS + +## Intended Usage +This Systematic Survey Site Data template should be used to record data about a +Site area where species occurrences have been sampled during a systematic survey. + +This Systematic Survey Site template **must be used in combination** with the +`Systematic Survey Occurrence Data` template and the `Systematic Survey Metadata` template, +and in some cases the `Systematic Survey Site Visit` template. + +Templates have been provided to facilitate integration of data into the Biodiversity Data +Repository (BDR) database. Not all types of data have been catered for in the available +templates at this stage - if you are unable to find a suitable template, please +contact to make us aware of your data needs. + +## Data Validation Requirements: +For data validation, you will need your data file to: + +- be the correct **file format**, +- have **fields that match the template downloaded** (do not remove, or + change the order of fields), +- have extant values for **mandatory fields** (see Table 1), and +- comply with all **data value constraints**; for example the geographic coordinates are + consistent with a [geodeticDatum](#geodeticDatum-vocabularies) type of the + ***{{values.geodetic_datum_count}}*** available options. + +Additional fields may be added **after the templated fields** (noting that the data type +is not assumed and values will be encoded as strings). + +### FILE FORMAT +- The systematic survey site data template is a [UTF-8](#appendix-iv-utf-8) encoded csv (not Microsoft + Excel Spreadsheets). Be sure to save this file with your data as a .csv (UTF-8) as follows, + otherwise it will not pass the csv validation step upon upload. +
`[MS Excel: Save As > More options > Tools > Web options > Save this document as > + Unicode (UTF-8)]`
+ otherwise it will not pass the csv validation step upon upload. +- **Do not include empty rows**. + +### FILE NAME + +When making a manual submission to the Biodiversity Data Repository, +the file name must include the version number +of this biodiversity data template (`v{{ metadata.version }}`). +The following format is an example of a valid file name: + +`data_descripion-v{{ metadata.version }}-additional_description.csv` + +where: + +* `data_description`: A short description of the data (e.g. `survey_sites`, `test_data`). +* `v{{ metadata.version }}`: The version number of this template. +* `additional_description`: (Optional) Additional description of the data, if needed (e.g. `test_data`). +* `.csv`: Ensure the file name ends with `.csv`. + +For example, `survey_sites-v{{ metadata.version }}-test_data.csv` or `test_data-v{{ metadata.version }}.csv` + +### FILE SIZE +MS Excel imposes a limit of 1,048,576 rows on a spreadsheet, limiting a CSV file to the +header row followed by 1,048,575 occurrences. Furthermore, MS Excel has a 32,767 character +limit on individual cells in a spreadsheet. These limits may be overcome by using or +editing CSV files with other software. + +Larger datasets may be more readily ingested using the API interface. Please contact + to make us aware of your data needs. + +## TEMPLATE FIELDS +The template contains the field names in the top row. Table 1 will assist you in transferring +your data to the template indicating: + +- **Field name** in the template (and an external link to the [Darwin Core standard](https://dwc.tdwg.org/terms/) + for that field where relevant); +- **Description** of the field; +- **Required** i.e. whether the field is **mandatory, +conditionally mandatory, or optional**; +- **Format** (datatype) required for the data values for example text (string), number + (integer, float), or date; +- **Example** of an entry or entries for that field; and +- **[Vocabulary links](#appendix-i-vocabulary-list)** within this document (for example pick list values) where + relevant. The fields that have suggested values options for the fields in Table 1 are + listed in Table 2 in alphabetical order of the field name. + +### ADDITIONAL FIELDS +Data that does not match the existing template fields may be added as additional columns in +the CSV files after the templated fields. +For example, `fieldNotes`, `continent`, `country`, `countryCode`, `stateProvince`, `georeferencedDate`, +`landformPattern`, `landformElement`, `aspect`, `slope`. + +Table 1: Systematic Survey Site data template fields with descriptions, conditions, datatype format, and examples. + +{{tables.fields}} + +## CHANGELOG + +No changes from Systematic Survey Site Data Template v2.0.0 + +## APPENDICES +### APPENDIX-I: Vocabulary List +With the exception of `geodeticDatum` and `relationshipToRelatedSite`, the data validation +does not require fields to adhere to the vocabularies specified for the various vocabularied fields. +These vocabularies are merely provided as a means of assistance in developing consistent language +within the database. New terms may be added to more appropriately describe your data that goes +beyond the current list. + +Table 2: Suggested values for controlled vocabulary fields in the template. Each term has a preferred label with a definition to aid understanding +of its meaning. For some terms, alternative +labels with similar semantics are provided. +
**Note:** The values for `geodeticDatum` and `relationshipToRelatedSite` must come from one of the Preferred labels or Alternate Labels in this +table. + +{{tables.vocabularies}} + +### APPENDIX-II: Well Known Text (WKT) +For general information on how WKT coordinate reference data is formatted is available [here](https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry). +The length of a WKT string or of its components is not prescribed; however, MS Excel *does* has a +32,767 (32K) character limit on individual cells in a spreadsheet. + +It is possible to edit CSV files outside of Excel in order to include more than 32K characters. + +![Multipart geometries (2D) WKT](assets/multipart_geometries_2d_wkt.png) +
*Source: Mwtoews - CC BY-SA 3.0 - Wikipedia *
+ +### APPENDIX-III: Timestamp +Following date and date-time formats are acceptable within the timestamp: + +| TYPE | FORMAT | +| --- |-------------------------------------------------------------------------------------------------------------------------------------| +| **xsd:dateTimeStamp with timezone** | yyyy-mm-ddThh:mm:ss.sTZD (eg 1997-07-16T19:20:30.45+01:00) OR
yyyy-mm-ddThh:mm:ssTZD (eg 1997-07-16T19:20:30+01:00) OR
yyyy-mm-ddThh:mmTZD (eg 1997-07-16T19:20+01:00)| +| **xsd:dateTime** | yyyy-mm-ddThh:mm:ss.s (eg 1997-07-16T19:20:30.45) OR
yyyy-mm-ddThh:mm:ss (eg 1997-07-16T19:20:30) OR
yyyy-mm-ddThh:mm (eg 1997-07-16T19:20) | +| **xsd:Date** | dd/mm/yyyy OR
d/m/yyyy OR
yyyy-mm-dd OR
yyyy-m-d | +| **xsd:gYearMonth** | mm/yyyy OR
m/yyyy OR
yyyy-mm | +| **xsd:gYear** | yyyy | + +Where:
+  `yyyy`: four-digit year
+  `mm`: two-digit month (01=January, etc.)
+  `dd`: two-digit day of month (01 through 31)
+  `hh`: two digits of hour (00 through 23) (am/pm NOT allowed)
+  `mm`: two digits of minute (00 through 59)
+  `ss`: two digits of second (00 through 59)
+ + +### APPENDIX-IV: UTF-8 +UTF-8 encoding is considered a best practice for handling character encoding, especially in +the context of web development, data exchange, and modern software systems. UTF-8 +(Unicode Transformation Format, 8-bit) is a variable-width character encoding capable of +encoding all possible characters (code points) in Unicode.
+Here are some reasons why UTF-8 is recommended: +- **Universal Character Support:** UTF-8 can represent almost all characters from all writing + systems in use today. This includes characters from various languages, mathematical symbols, + and other special characters. +- **Backward Compatibility:** UTF-8 is backward compatible with ASCII (American + Standard Code for Information Interchange). The first 128 characters in UTF-8 are + identical to ASCII, making it easy to work with systems that use ASCII. +- **Efficiency:** UTF-8 is space-efficient for Latin-script characters (common in English + and many other languages). It uses one byte for ASCII characters and up to four + bytes for other characters. This variable-length encoding minimises storage and + bandwidth requirements. +- **Web Standards:** UTF-8 is the dominant character encoding for web content. It is + widely supported by browsers, servers, and web-related technologies. +- **Globalisation:** As software applications become more globalised, supporting a wide + range of languages and scripts becomes crucial. UTF-8 is well-suited for + internationalisation and multilingual support. +- **Compatibility with Modern Systems:** UTF-8 is the default encoding for many + programming languages, databases, and operating systems. Choosing UTF-8 helps + ensure compatibility across different platforms and technologies. + +When working with text data, UTF-8 encoding is recommended to avoid issues related to character +representation and ensure that a diverse set of characters and languages is supported. + +For assistance, please contact: +{% endblock %} diff --git a/abis_mapping/templates/survey_site_data_v3/validators/shaping.py b/abis_mapping/templates/survey_site_data_v3/validators/shaping.py new file mode 100644 index 00000000..864a9608 --- /dev/null +++ b/abis_mapping/templates/survey_site_data_v3/validators/shaping.py @@ -0,0 +1,288 @@ +"""Generates a SHACL shape graph for the survey_site_data template v3.""" + +# Third-party +import rdflib +import rdflib.term +import rdflib.collection +import pyshacl + +# Local +from abis_mapping import utils +from abis_mapping import vocabs + + +# Constants +a = rdflib.RDF.type +SH = rdflib.Namespace("http://www.w3.org/ns/shacl#") +TEMPORAL_DATATYPES = [ + (rdflib.TIME.inXSDDate, rdflib.XSD.date), + (rdflib.TIME.inXSDDateTime, rdflib.XSD.dateTime), + (rdflib.TIME.inXSDDateTimeStamp, rdflib.XSD.dateTimeStamp), + (rdflib.TIME.inXSDgYearMonth, rdflib.XSD.gYearMonth), + (rdflib.TIME.inXSDgYear, rdflib.XSD.gYear), +] + + +def main() -> None: + """Implementation of the SHACL generation.""" + + # Create graph and bind shape namespace + g = utils.rdf.create_graph() + g.bind("sh", SH) + + # Call each of the major shape constructions + dataset_shape = create_dataset_shape(g) + add_site_shape(g, dataset_shape) + add_site_visit_shape(g, dataset_shape) + + # Perform a validation + d = rdflib.Graph().parse(source="abis_mapping/templates/survey_site_data_v3/examples/minimal.ttl") + s = rdflib.Graph().parse(data=g.serialize()) + valid, rgraph, rtext = pyshacl.validate(data_graph=d, shacl_graph=s) + + # Print report to stdout. + print(rtext) + + # Raise exception if not valid. + if not valid: + raise AssertionError("not valid") + + # Write out to file + g.serialize("abis_mapping/templates/survey_site_data_v3/validators/validator.ttl") + + +def add_site_visit_shape(graph: rdflib.Graph, dataset_shape: rdflib.term.Node) -> rdflib.term.Node: + """Defines and adds site visit shape to graph. + + Args: + graph (rdflib.Graph): Graph to add site visit + dataset_shape (rdflib.term.Node): Dataset reference node: + + Returns: + rdflib.term.Node: Site visit shape node reference + """ + + # Declare shape uri + site_visit_shape = utils.namespaces.BDR.SiteVisitShape + + # Add type and target class + graph.add((site_visit_shape, a, SH.NodeShape)) + graph.add((site_visit_shape, SH.targetClass, utils.namespaces.TERN.SiteVisit)) + + # Add dataset prop + graph.add((site_visit_shape, SH.property, dataset_shape)) + + # Add temporal entity prop + temporal_entity_prop = rdflib.BNode() + graph.add((temporal_entity_prop, SH["class"], rdflib.TIME.TemporalEntity)) + graph.add((temporal_entity_prop, SH.path, rdflib.TIME.hasTime)) + # # Add dates + begin_prop = rdflib.BNode() + graph.add((begin_prop, SH["class"], rdflib.TIME.Instant)) + graph.add((begin_prop, SH.path, rdflib.TIME.hasBeginning)) + temporal_type_opts = temporal_type_list(graph) + graph.add((begin_prop, SH["or"], temporal_type_opts)) + graph.add((temporal_entity_prop, SH.property, begin_prop)) + end_prop = rdflib.BNode() + graph.add((end_prop, SH["class"], rdflib.TIME.Instant)) + graph.add((end_prop, SH.path, rdflib.TIME.hasEnd)) + graph.add((end_prop, SH["or"], temporal_type_opts)) + graph.add((temporal_entity_prop, SH.property, end_prop)) + graph.add((site_visit_shape, SH.property, temporal_entity_prop)) + + # Return reference uri + return site_visit_shape + + +def temporal_type_list(graph: rdflib.Graph) -> rdflib.term.Node: + """Creates an rdf list of temporal types. + + Args: + graph (rdflib.Graph): Graph the list will be added: + + Returns: + rdflib.term.Node: Reference to the list + """ + # Declare temporal type list uri + temporal_type_opts = utils.namespaces.BDR.TemporalTypesList + + # Empty list to hold the temporal datatypes + temporal_type_nodes: list[rdflib.term.Node] = [] + for in_xsd, data_type in TEMPORAL_DATATYPES: + new_node = rdflib.BNode() + graph.add((new_node, SH.path, in_xsd)) + graph.add((new_node, SH.datatype, data_type)) + temporal_type_nodes.append(new_node) + + # Create a collection and add to the graph + rdflib.collection.Collection(graph, temporal_type_opts, temporal_type_nodes) + + # Return the top reference uri + return temporal_type_opts + + +def create_dataset_shape(graph: rdflib.Graph) -> rdflib.term.Node: + """Creates the dataset shape and adds to graph. + + Args: + graph (rdflib.Graph): Graph to be added to. + + Returns: + rdflib.term.Node: Reference to the dataset shape. + """ + # Add the dataset prop + dataset_prop = utils.namespaces.BDR.DatasetShape + graph.add((dataset_prop, a, SH.PropertyShape)) + graph.add((dataset_prop, SH.path, rdflib.VOID.inDataset)) + graph.add((dataset_prop, SH["class"], utils.namespaces.TERN.Dataset)) + graph.add((dataset_prop, SH.minCount, rdflib.Literal(1))) + graph.add((dataset_prop, SH.maxCount, rdflib.Literal(1))) + + # Return reference uri + return dataset_prop + + +def add_site_shape(g: rdflib.Graph, dataset_shape: rdflib.term.Node) -> rdflib.term.Node: + """Adds the site shape to the graph. + + Args: + g (rdflib.Graph): Graph to be modified. + dataset_shape (rdflib.term.Node): Reference to the dataset shape + + Returns: + rdflib.term.Node: Reference to the site shape. + + """ + # Declare the shape uri + site_shape = utils.namespaces.BDR.SiteShape + + # Set the target class + g.add((site_shape, a, SH.NodeShape)) + g.add((site_shape, SH.targetClass, utils.namespaces.TERN.Site)) + + # Add the dataset prop + g.add((site_shape, SH.property, dataset_shape)) + + # Add the site visit prop + site_visit_prop = rdflib.BNode() + g.add((site_visit_prop, SH["class"], utils.namespaces.TERN.SiteVisit)) + g.add((site_visit_prop, SH.path, utils.namespaces.TERN.hasSiteVisit)) + g.add((site_visit_prop, SH.minCount, rdflib.Literal(1))) + g.add((site_visit_prop, SH.maxCount, rdflib.Literal(1))) + g.add((site_shape, SH.property, site_visit_prop)) + + # Add the site id property + site_id_prop = rdflib.BNode() + g.add((site_id_prop, SH.path, rdflib.SDO.identifier)) + g.add((site_id_prop, SH.datatype, rdflib.XSD.string)) + g.add((site_id_prop, SH.minCount, rdflib.Literal(1))) + g.add((site_id_prop, SH.maxCount, rdflib.Literal(1))) + g.add((site_shape, SH.property, site_id_prop)) + + # Add the feature type prop + feature_type_prop = rdflib.BNode() + g.add((feature_type_prop, SH.path, utils.namespaces.TERN.featureType)) + g.add((feature_type_prop, SH.hasValue, vocabs.site_type.SITE.iri)) + g.add((feature_type_prop, SH.minCount, rdflib.Literal(1))) + g.add((feature_type_prop, SH.maxCount, rdflib.Literal(1))) + g.add((site_shape, SH.property, feature_type_prop)) + + # Add the dcterms type prop + dcterms_type_prop = rdflib.BNode() + terms_list = rdflib.BNode() + terms_list_shape = rdflib.BNode() + dcterms_type_opts = rdflib.BNode() + g.add((dcterms_type_prop, SH.path, rdflib.SDO.additionalType)) + site_type_vocab_values: list[rdflib.term.Node] = [v.iri for v in vocabs.site_type.SiteType.terms if v is not None] + rdflib.collection.Collection(g, terms_list, site_type_vocab_values) + g.add((terms_list_shape, SH["in"], terms_list)) + site_type_concept_shape = site_type_concept(g) + rdflib.collection.Collection(g, dcterms_type_opts, [terms_list_shape, site_type_concept_shape]) + g.add((dcterms_type_prop, SH.xone, dcterms_type_opts)) + g.add((dcterms_type_prop, SH.minCount, rdflib.Literal(1))) + g.add((dcterms_type_prop, SH.maxCount, rdflib.Literal(1))) + g.add((site_shape, SH.property, dcterms_type_prop)) + + # Add SDO name prop + sdo_name_prop = rdflib.BNode() + g.add((sdo_name_prop, SH.path, rdflib.SDO.name)) + g.add((sdo_name_prop, SH.datatype, rdflib.XSD.string)) + g.add((sdo_name_prop, SH.maxCount, rdflib.Literal(1))) + g.add((site_shape, SH.property, sdo_name_prop)) + + # Add SDO description prop + sdo_description_prop = rdflib.BNode() + g.add((sdo_description_prop, SH.path, rdflib.SDO.description)) + g.add((sdo_description_prop, SH.datatype, rdflib.XSD.string)) + g.add((sdo_description_prop, SH.maxCount, rdflib.Literal(1))) + g.add((site_shape, SH.property, sdo_description_prop)) + + # Add coord uncertainty prop + coord_uncert_prop = rdflib.BNode() + g.add((coord_uncert_prop, SH.path, utils.namespaces.GEO.hasMetricSpatialAccuracy)) + g.add((coord_uncert_prop, SH.datatype, rdflib.XSD.double)) + g.add((coord_uncert_prop, SH.maxCount, rdflib.Literal(1))) + g.add((site_shape, SH.property, coord_uncert_prop)) + + # Add geometry prop + site_geometry_prop = rdflib.BNode() + g.add((site_geometry_prop, SH.path, utils.namespaces.GEO.hasGeometry)) + g.add((site_geometry_prop, SH["class"], utils.namespaces.GEO.Geometry)) + g.add((site_geometry_prop, SH.maxCount, rdflib.Literal(2))) # Both point and footprintWKT supplied + wkt_prop = rdflib.BNode() + g.add((wkt_prop, SH.path, utils.namespaces.GEO.asWKT)) + g.add((wkt_prop, SH.datatype, utils.namespaces.GEO.wktLiteral)) + g.add((wkt_prop, SH.minCount, rdflib.Literal(1))) + g.add((wkt_prop, SH.minCount, rdflib.Literal(1))) + g.add((site_geometry_prop, SH.property, wkt_prop)) + g.add((site_shape, SH.property, site_geometry_prop)) + + # Return ref to site shape + return site_shape + + +def site_type_concept(g: rdflib.Graph) -> rdflib.term.Node: + """Site type concept shape. + + Args: + g (rdflib.Graph): Graph to be added to. + + Returns: + rdflib.term.Node: Site type concept shape reference. + + """ + # Add SKOS concept shapetype + concept_shape = utils.namespaces.BDR.SiteTypeConceptShape + g.add((concept_shape, a, SH.NodeShape)) + g.add((concept_shape, SH.targetClass, rdflib.SKOS.Concept)) + + # Add definition prop + definition_prop = rdflib.BNode() + g.add((definition_prop, SH.path, rdflib.SKOS.definition)) + g.add((definition_prop, SH.hasValue, rdflib.Literal("A type of site."))) + g.add((concept_shape, SH.property, definition_prop)) + + # Add scheme + scheme_prop = rdflib.BNode() + g.add((scheme_prop, SH.path, rdflib.SKOS.inScheme)) + g.add( + ( + scheme_prop, + SH.hasValue, + rdflib.URIRef("http://linked.data.gov.au/def/tern-cv/74aa68d3-28fd-468d-8ff5-7e791d9f7159"), + ) + ) + g.add((concept_shape, SH.property, scheme_prop)) + + # Add preflabel + pref_label_prop = rdflib.BNode() + g.add((pref_label_prop, SH.path, rdflib.SKOS.prefLabel)) + g.add((pref_label_prop, SH.datatype, rdflib.XSD.string)) + g.add((concept_shape, SH.property, pref_label_prop)) + + # Return reference + return concept_shape + + +if __name__ == "__main__": + main() diff --git a/abis_mapping/templates/survey_site_data_v3/validators/validator.ttl b/abis_mapping/templates/survey_site_data_v3/validators/validator.ttl new file mode 100644 index 00000000..99f74f9c --- /dev/null +++ b/abis_mapping/templates/survey_site_data_v3/validators/validator.ttl @@ -0,0 +1,81 @@ +@prefix bdr: . +@prefix geo: . +@prefix rdf: . +@prefix schema: . +@prefix sh: . +@prefix skos: . +@prefix tern: . +@prefix time: . +@prefix void: . +@prefix xsd: . + +bdr:SiteShape a sh:NodeShape ; + sh:property [ sh:datatype xsd:string ; + sh:maxCount 1 ; + sh:minCount 1 ; + sh:path schema:identifier ], + [ sh:class geo:Geometry ; + sh:maxCount 2 ; + sh:path geo:hasGeometry ; + sh:property [ sh:datatype geo:wktLiteral ; + sh:minCount 1 ; + sh:path geo:asWKT ] ], + [ sh:hasValue ; + sh:maxCount 1 ; + sh:minCount 1 ; + sh:path tern:featureType ], + [ sh:maxCount 1 ; + sh:minCount 1 ; + sh:path schema:additionalType ; + sh:xone ( [ sh:in ( ) ] bdr:SiteTypeConceptShape ) ], + [ sh:datatype xsd:string ; + sh:maxCount 1 ; + sh:path schema:description ], + [ sh:class tern:SiteVisit ; + sh:maxCount 1 ; + sh:minCount 1 ; + sh:path tern:hasSiteVisit ], + [ sh:datatype xsd:string ; + sh:maxCount 1 ; + sh:path schema:name ], + [ sh:datatype xsd:double ; + sh:maxCount 1 ; + sh:path geo:hasMetricSpatialAccuracy ], + bdr:DatasetShape ; + sh:targetClass tern:Site . + +bdr:SiteVisitShape a sh:NodeShape ; + sh:property [ sh:class time:TemporalEntity ; + sh:path time:hasTime ; + sh:property [ sh:class time:Instant ; + sh:or bdr:TemporalTypesList ; + sh:path time:hasEnd ], + [ sh:class time:Instant ; + sh:or bdr:TemporalTypesList ; + sh:path time:hasBeginning ] ], + bdr:DatasetShape ; + sh:targetClass tern:SiteVisit . + +bdr:SiteTypeConceptShape a sh:NodeShape ; + sh:property [ sh:datatype xsd:string ; + sh:path skos:prefLabel ], + [ sh:hasValue "A type of site." ; + sh:path skos:definition ], + [ sh:hasValue ; + sh:path skos:inScheme ] ; + sh:targetClass skos:Concept . + +bdr:DatasetShape a sh:PropertyShape ; + sh:class tern:Dataset ; + sh:maxCount 1 ; + sh:minCount 1 ; + sh:path void:inDataset . + +bdr:TemporalTypesList rdf:first [ sh:datatype xsd:date ; + sh:path time:inXSDDate ] ; + rdf:rest ( [ sh:datatype xsd:dateTime ; + sh:path time:inXSDDateTime ] [ sh:datatype xsd:dateTimeStamp ; + sh:path time:inXSDDateTimeStamp ] [ sh:datatype xsd:gYearMonth ; + sh:path time:inXSDgYearMonth ] [ sh:datatype xsd:gYear ; + sh:path time:inXSDgYear ] ) . + diff --git a/abis_mapping/templates/survey_site_visit_data_v3/README.md b/abis_mapping/templates/survey_site_visit_data_v3/README.md new file mode 100644 index 00000000..d2595b7c --- /dev/null +++ b/abis_mapping/templates/survey_site_visit_data_v3/README.md @@ -0,0 +1,5 @@ +# Template Description +A template to translate some Darwin Core fields + +# Template Instructions +See `instructions.pdf` for more details diff --git a/abis_mapping/templates/survey_site_visit_data_v3/examples/minimal-error-dates-wrong-order.csv b/abis_mapping/templates/survey_site_visit_data_v3/examples/minimal-error-dates-wrong-order.csv new file mode 100644 index 00000000..368fd74c --- /dev/null +++ b/abis_mapping/templates/survey_site_visit_data_v3/examples/minimal-error-dates-wrong-order.csv @@ -0,0 +1,2 @@ +surveyID,siteID,siteIDSource,siteVisitID,siteVisitStart,siteVisitEnd,visitOrgs,visitObservers,condition,targetTaxonomicScope,protocolName,protocolDescription,samplingEffortValue,samplingEffortUnit +S1,PLOT1,GAIA,VA-99,2024-10-01,2024-09-22,GAIA,John Smith,Burnt,Coleoptera,harpTrapping,Three conventional harp traps,20 x 12,trapDays diff --git a/abis_mapping/templates/survey_site_visit_data_v3/examples/minimal-error-no-dates.csv b/abis_mapping/templates/survey_site_visit_data_v3/examples/minimal-error-no-dates.csv new file mode 100644 index 00000000..e7fb3438 --- /dev/null +++ b/abis_mapping/templates/survey_site_visit_data_v3/examples/minimal-error-no-dates.csv @@ -0,0 +1,2 @@ +surveyID,siteID,siteIDSource,siteVisitID,siteVisitStart,siteVisitEnd,visitOrgs,visitObservers,condition,targetTaxonomicScope,protocolName,protocolDescription,samplingEffortValue,samplingEffortUnit +S1,PLOT1,GAIA,VA-99,,,GAIA,John Smith,Burnt,Coleoptera,harpTrapping,Three conventional harp traps,20 x 12,trapDays diff --git a/abis_mapping/templates/survey_site_visit_data_v3/examples/minimal.csv b/abis_mapping/templates/survey_site_visit_data_v3/examples/minimal.csv new file mode 100644 index 00000000..f0ecd031 --- /dev/null +++ b/abis_mapping/templates/survey_site_visit_data_v3/examples/minimal.csv @@ -0,0 +1,4 @@ +surveyID,siteID,siteIDSource,siteVisitID,siteVisitStart,siteVisitEnd,visitOrgs,visitObservers,condition,targetTaxonomicScope,protocolName,protocolDescription,samplingEffortValue,samplingEffortUnit +TIS-24-03,P1,WAM,TIS-24-03-P1-01,2024-03-12,2024-04-04,WAM | DBCA,ORCID00001 | ORCID00002,dry,new_taxon,wet pitfall trap,10 x square buckets of size 20 x 20 x 15 cm. Propylene glycol.,240,trap nights +TIS-24-03,P1,WAM,TIS-24-03-P1-02,2024-03-12,2024-03-12,WAM,ORCID00001,moist leaf litter after recent rain,invertebrate,litter sifting,50 cm diameter sifter with 5 mm mesh. Litter samles taken ~1 metre from each pitfall trap,10,sifts +TIS-24-03,P1,WAM,TIS-24-03-P1-03,2024-03-12,,WAM,ORCID00003,,bird,human observation,,, diff --git a/abis_mapping/templates/survey_site_visit_data_v3/examples/minimal.ttl b/abis_mapping/templates/survey_site_visit_data_v3/examples/minimal.ttl new file mode 100644 index 00000000..af9370cc --- /dev/null +++ b/abis_mapping/templates/survey_site_visit_data_v3/examples/minimal.ttl @@ -0,0 +1,208 @@ +@prefix prov: . +@prefix rdf: . +@prefix rdfs: . +@prefix schema: . +@prefix skos: . +@prefix sosa: . +@prefix tern: . +@prefix time: . +@prefix xsd: . + + a schema:Collection ; + schema:isPartOf ; + schema:member ; + schema:name "Site Visit Collection - Sampling Effort - 10 sifts" ; + tern:hasAttribute . + + a schema:Collection ; + schema:isPartOf ; + schema:member ; + schema:name "Site Visit Collection - Sampling Effort - 240 trap nights" ; + tern:hasAttribute . + + a schema:Collection ; + schema:isPartOf ; + schema:member ; + schema:name "Site Visit Collection - Target Taxonomic Scope - bird" ; + tern:hasAttribute . + + a schema:Collection ; + schema:isPartOf ; + schema:member ; + schema:name "Site Visit Collection - Target Taxonomic Scope - invertebrate" ; + tern:hasAttribute . + + a schema:Collection ; + schema:isPartOf ; + schema:member ; + schema:name "Site Visit Collection - Target Taxonomic Scope - new_taxon" ; + tern:hasAttribute . + + a rdfs:Datatype ; + skos:definition "An identifier for the site" ; + skos:prefLabel "WAM Site ID" ; + prov:qualifiedAttribution . + + a tern:SiteVisit ; + time:hasTime [ a time:TemporalEntity ; + time:hasBeginning [ a time:Instant ; + time:inXSDDate "2024-03-12"^^xsd:date ] ] ; + prov:hadPlan ; + prov:wasAssociatedWith , + ; + schema:identifier "TIS-24-03-P1-03" ; + schema:isPartOf , + ; + tern:hasSite . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "10 sifts" ; + tern:hasValue . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "240 trap nights" ; + tern:hasValue . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "bird" ; + tern:hasValue . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "invertebrate" ; + tern:hasValue . + + a tern:Attribute ; + schema:isPartOf ; + tern:attribute ; + tern:hasSimpleValue "new_taxon" ; + tern:hasValue . + + a prov:Attribution ; + prov:agent ; + prov:hadRole . + + a skos:Concept ; + skos:broader ; + skos:definition "A type of protocolName" ; + skos:inScheme ; + skos:prefLabel "litter sifting" ; + schema:citation "https://linked.data.gov.au/dataset/bdr/00000000-0000-0000-0000-000000000000"^^xsd:anyURI . + + a skos:Concept ; + skos:definition "In conjunction with the sampling effort value, the sampling effort unit gives an indication of the effort applied to the specified protocol." ; + skos:inScheme ; + skos:prefLabel "sifts" ; + schema:citation "https://linked.data.gov.au/dataset/bdr/00000000-0000-0000-0000-000000000000"^^xsd:anyURI . + + a skos:Concept ; + skos:broader ; + skos:definition "A type of targetTaxonomicScope" ; + skos:inScheme ; + skos:prefLabel "new_taxon" ; + schema:citation "https://linked.data.gov.au/dataset/bdr/00000000-0000-0000-0000-000000000000"^^xsd:anyURI . + + a tern:Float, + tern:Value ; + rdf:value "10.0"^^xsd:float ; + tern:unit . + + a tern:Float, + tern:Value ; + rdf:value "240.0"^^xsd:float ; + tern:unit . + + a tern:IRI, + tern:Value ; + rdf:value . + + a tern:IRI, + tern:Value ; + rdf:value . + + a tern:IRI, + tern:Value ; + rdf:value . + + a prov:Plan ; + sosa:usedProcedure ; + schema:description "10 x square buckets of size 20 x 20 x 15 cm. Propylene glycol." ; + schema:isPartOf . + + a prov:Plan ; + sosa:usedProcedure ; + schema:description "50 cm diameter sifter with 5 mm mesh. Litter samles taken ~1 metre from each pitfall trap" ; + schema:isPartOf . + + a prov:Plan ; + sosa:usedProcedure ; + schema:isPartOf . + + a prov:Agent, + prov:Organization ; + schema:name "DBCA" . + + a prov:Agent, + prov:Person ; + schema:name "ORCID00002" . + + a prov:Agent, + prov:Person ; + schema:name "ORCID00003" . + + a tern:SiteVisit ; + time:hasTime [ a time:TemporalEntity ; + time:hasBeginning [ a time:Instant ; + time:inXSDDate "2024-03-12"^^xsd:date ] ; + time:hasEnd [ a time:Instant ; + time:inXSDDate "2024-04-04"^^xsd:date ] ] ; + prov:hadPlan ; + prov:wasAssociatedWith , + , + , + ; + schema:identifier "TIS-24-03-P1-01" ; + schema:isPartOf , + ; + tern:hasSite ; + tern:siteDescription "dry" . + + a tern:SiteVisit ; + time:hasTime [ a time:TemporalEntity ; + time:hasBeginning [ a time:Instant ; + time:inXSDDate "2024-03-12"^^xsd:date ] ; + time:hasEnd [ a time:Instant ; + time:inXSDDate "2024-03-12"^^xsd:date ] ] ; + prov:hadPlan ; + prov:wasAssociatedWith , + ; + schema:identifier "TIS-24-03-P1-02" ; + schema:isPartOf , + ; + tern:hasSite ; + tern:siteDescription "moist leaf litter after recent rain" . + + a prov:Agent, + prov:Person ; + schema:name "ORCID00001" . + + a tern:Site ; + schema:identifier "P1"^^ ; + schema:isPartOf . + + a tern:Survey ; + schema:isPartOf . + + a prov:Agent, + prov:Organization ; + schema:name "WAM" . + + a tern:Dataset . + diff --git a/abis_mapping/templates/survey_site_visit_data_v3/mapping.py b/abis_mapping/templates/survey_site_visit_data_v3/mapping.py new file mode 100644 index 00000000..8a074677 --- /dev/null +++ b/abis_mapping/templates/survey_site_visit_data_v3/mapping.py @@ -0,0 +1,955 @@ +"""Provides ABIS Mapper for `survey_site_visit_data-v3.0.0` template.""" + +# Standard +import dataclasses + +# Third-party +import frictionless +import rdflib + +# Local +from abis_mapping import base +from abis_mapping import plugins +from abis_mapping import models +from abis_mapping import utils + +# Typing +from typing import Any + + +# Constants / shortcuts +a = rdflib.RDF.type +DATA_ROLE_RESOURCE_PROVIDER = rdflib.URIRef("https://linked.data.gov.au/def/data-roles/resourceProvider") +CONCEPT_TARGET_TAXONOMIC_SCOPE = rdflib.URIRef( + "https://linked.data.gov.au/def/nrm/7ea12fed-6b87-4c20-9ab4-600b32ce15ec", +) +CONCEPT_SAMPLING_EFFORT = utils.rdf.uri("concept/samplingEffort", utils.namespaces.EXAMPLE) + + +@dataclasses.dataclass +class Agent: + """Contains data items to enable producing agent nodes""" + + row_value: str + uri: rdflib.URIRef + + +class SurveySiteVisitMapper(base.mapper.ABISMapper): + """ABIS mapper for the v3 survey site visit data csv template.""" + + def apply_validation( + self, + data: base.types.ReadableType, + **kwargs: Any, + ) -> frictionless.Report: + """Applies Frictionless Validation for the csv Template + + Args: + data (base.types.ReadableType): Raw data to be validated. + **kwargs (Any): Additional keyword arguments. + + Returns: + frictionless.Report: Validation report for the specified data. + """ + # Construct schema + schema = self.extra_fields_schema(data=data, full_schema=True) + + # Construct resource + resource_site_visit_data = frictionless.Resource( + source=data, + format="csv", + schema=schema, + encoding="utf-8", + ) + + # Base extra custom checks + checks = [ + plugins.tabular.IsTabular(), + plugins.empty.NotEmpty(), + plugins.chronological.ChronologicalOrder( + field_names=["siteVisitStart", "siteVisitEnd"], + ), + plugins.mutual_inclusion.MutuallyInclusive( + field_names=["samplingEffortValue", "samplingEffortUnit"], + ), + ] + + # Validate the site visit resource + report: frictionless.Report = resource_site_visit_data.validate( + checklist=frictionless.Checklist( + checks=checks, + ), + ) + + # Return validation report + return report + + def extract_site_visit_id_to_site_id_map( + self, + data: base.types.ReadableType, + ) -> dict[str, str]: + """Constructs a dictionary mapping site visit id to site id. + + Args: + data: Raw data to be mapped. + + Returns: + Map with site visit id for keys and site id for values. + """ + # Construct schema + schema = frictionless.Schema.from_descriptor(self.schema()) + + # Construct resource + resource = frictionless.Resource(source=data, format="csv", schema=schema, encoding="utf-8") + + # Declare result reference + result: dict[str, str] = {} + + # Context manager for row streaming + with resource.open() as r: + for row in r.row_stream: + # Check that the cells have values and add to map + if (svid := row["siteVisitID"]) is not None and (sid := row["siteID"]) is not None: + result[svid] = sid + + # Return + return result + + def extract_temporal_defaults( + self, + data: base.types.ReadableType, + ) -> dict[str, str]: + """Constructs a dictionary mapping site visit id to default temporal entity. + + The default temporal entity value will contain serialized RDF as turtle. + + Args: + data (base.types.ReadableType): Raw data to be mapped. + + Returns: + dict[str, str]: Keys are the site visit id, values are the serialized + RDF (turtle) containing the default temporal entity. + """ + # Construct schema + schema = frictionless.Schema.from_descriptor(self.schema()) + + # Construct resource + resource = frictionless.Resource(source=data, format="csv", schema=schema, encoding="utf-8") + + # Create empty dictionary to hold map + result: dict[str, str] = {} + + # Context manager for row streaming + with resource.open() as r: + for row in r.row_stream: + # Extract values from row. + start_date: models.temporal.Timestamp | None = row["siteVisitStart"] + end_date: models.temporal.Timestamp | None = row["siteVisitEnd"] + site_visit_id: str | None = row["siteVisitID"] + + # Check for siteVisitID, even though siteVisitID is a mandatory field, it can be missing here + # because this method is called for cross-validation, regardless of if this template is valid. + if not site_visit_id: + continue + + # Temporal flexibility is dependent upon a start_date being present only. + # Again, even though siteVisitStart is a mandatory field, it can be None here + # because this method is called for cross-validation, regardless of if this template is valid. + if not start_date: + continue + + # Create new graph + graph = rdflib.Graph() + + self.add_temporal_coverage_bnode( + graph=graph, + start_date=start_date, + end_date=end_date, + ) + + # Add serialize rdf as turtle to result map + result[site_visit_id] = graph.serialize(format="turtle") + + return result + + def add_temporal_coverage_bnode( + self, + *, + graph: rdflib.Graph, + start_date: models.temporal.Timestamp, + end_date: models.temporal.Timestamp | None, + ) -> None: + """Creates and adds to graph, temporal coverage blank node. + + Args: + start_date: start date. + end_date: Optional end date. + graph: Graph to add to. + """ + # Create temporal coverage node + temporal_coverage = rdflib.BNode() + graph.add((temporal_coverage, a, rdflib.TIME.TemporalEntity)) + begin = rdflib.BNode() + graph.add((temporal_coverage, rdflib.TIME.hasBeginning, begin)) + graph.add((begin, a, rdflib.TIME.Instant)) + graph.add((begin, start_date.rdf_in_xsd, start_date.to_rdf_literal())) + if end_date is not None: + end = rdflib.BNode() + graph.add((temporal_coverage, rdflib.TIME.hasEnd, end)) + graph.add((end, a, rdflib.TIME.Instant)) + graph.add((end, end_date.rdf_in_xsd, end_date.to_rdf_literal())) + + def apply_mapping_row( + self, + *, + row: frictionless.Row, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + extra_schema: frictionless.Schema, + base_iri: rdflib.Namespace, + **kwargs: Any, + ) -> None: + """Applies mapping for a row in the Survey Site Visit Data template. + + Args: + row: Row to be processed in the dataset. + dataset: Dataset IRI this row is a part of. + graph: Graph to map row into. + extra_schema: Schema of extra fields. + base_iri: Base IRI to use for mapping. + """ + # variables starting with row_ are values from the row. + # variables starting with uri_ are constructed URIs. + + row_site_visit_id: str | None = row["siteVisitID"] + row_site_id: str | None = row["siteID"] + # should always have these mandatory fields, skip if not + if not row_site_visit_id: + return + if not row_site_id: + return + + # Part 1: Construct URIs from Row + + # Create TERN.SiteVisit subject IRI - Note this needs to match the iri construction of the + # survey occurrence template mapping, ensuring they will resolve properly. + uri_site_visit_activity = utils.iri_patterns.site_visit_iri(base_iri, row_site_visit_id) + + # TERN.Site subject IRI - Note this needs to match the iri construction of the + # survey site and occurrence template mapping, ensuring they will resolve properly. + uri_site = utils.iri_patterns.site_iri(base_iri, row_site_id) + + # Create TERN survey IRI from surveyID field + row_survey_id: str | None = row["surveyID"] + uri_survey = utils.iri_patterns.survey_iri(base_iri, row_survey_id) + + # URI for the Site Visit Plan + uri_site_visit_plan = utils.iri_patterns.plan_iri(base_iri, "visit", row_site_visit_id) + + # URIs based on the siteIDSource + row_site_id_source: str | None = row["siteIDSource"] + if row_site_id_source: + uri_site_id_datatype = utils.iri_patterns.datatype_iri("siteID", row_site_id_source) + uri_site_id_datatype_attribution = utils.iri_patterns.attribution_iri( + base_iri, "resourceProvider", row_site_id_source + ) + uri_site_id_datatype_agent = utils.iri_patterns.agent_iri(row_site_id_source) + else: + uri_site_id_datatype = None + uri_site_id_datatype_attribution = None + uri_site_id_datatype_agent = None + + # Create Agents for each visit Org + row_visit_orgs: list[str] | None = row["visitOrgs"] + visit_org_agents: list[Agent] + if row_visit_orgs: + visit_org_agents = [ + Agent( + row_value=visit_org, + uri=utils.iri_patterns.agent_iri(visit_org), + ) + for visit_org in row_visit_orgs + ] + else: + visit_org_agents = [] + + # Create Agents for each visit Observer + row_visit_observers: list[str] | None = row["visitObservers"] + visit_observer_agents: list[Agent] + if row_visit_observers: + visit_observer_agents = [ + Agent( + row_value=visit_observer, + uri=utils.iri_patterns.agent_iri(visit_observer), + ) + for visit_observer in row_visit_observers + ] + else: + visit_observer_agents = [] + + # Conditionally create Attribute and Value for targetTaxonomicScope + row_target_taxonomic_scope: str | None = row["targetTaxonomicScope"] + if row_target_taxonomic_scope: + uri_target_taxonomic_scope_attribute = utils.iri_patterns.attribute_iri( + base_iri, "targetTaxonomicScope", row_target_taxonomic_scope + ) + uri_target_taxonomic_scope_value = utils.iri_patterns.attribute_value_iri( + base_iri, "targetTaxonomicScope", row_target_taxonomic_scope + ) + uri_target_taxonomic_scope_collection = utils.iri_patterns.attribute_collection_iri( + base_iri, "SiteVisit", "targetTaxonomicScope", row_target_taxonomic_scope + ) + else: + uri_target_taxonomic_scope_attribute = None + uri_target_taxonomic_scope_value = None + uri_target_taxonomic_scope_collection = None + + # Conditionally create Attribute and Value for samplingEffort + row_sampling_effort_value: str | None = row["samplingEffortValue"] + row_sampling_effort_unit: str | None = row["samplingEffortUnit"] + if row_sampling_effort_value and row_sampling_effort_unit: + row_sampling_effort = f"{row_sampling_effort_value} {row_sampling_effort_unit}" + uri_sampling_effort_attribute = utils.iri_patterns.attribute_iri( + base_iri, "samplingEffort", row_sampling_effort + ) + uri_sampling_effort_value = utils.iri_patterns.attribute_value_iri( + base_iri, "samplingEffort", row_sampling_effort + ) + uri_sampling_effort_collection = utils.iri_patterns.attribute_collection_iri( + base_iri, "SiteVisit", "samplingEffort", row_sampling_effort + ) + else: + row_sampling_effort = None + uri_sampling_effort_attribute = None + uri_sampling_effort_value = None + uri_sampling_effort_collection = None + + # Part 2: Construct mapping from row data and URIs + + # Add Site Visit Activity + self.add_site_visit_activity( + uri=uri_site_visit_activity, + row_site_visit_id=row_site_visit_id, + uri_survey=uri_survey, + uri_site=uri_site, + uri_site_visit_plan=uri_site_visit_plan, + visit_org_agents=visit_org_agents, + visit_observer_agents=visit_observer_agents, + row=row, + dataset=dataset, + graph=graph, + ) + + # Add survey + self.add_survey( + uri=uri_survey, + dataset=dataset, + graph=graph, + ) + + # Add site + self.add_site( + uri=uri_site, + uri_site_id_datatype=uri_site_id_datatype, + dataset=dataset, + row=row, + graph=graph, + ) + + # Add site id datatype, attribution and agent + self.add_site_id_datatype( + uri=uri_site_id_datatype, + row_site_id_source=row_site_id_source, + uri_site_id_datatype_attribution=uri_site_id_datatype_attribution, + graph=graph, + ) + self.add_site_id_datatype_attribution( + uri=uri_site_id_datatype_attribution, + uri_site_id_datatype_agent=uri_site_id_datatype_agent, + graph=graph, + ) + self.add_site_id_datatype_agent( + uri=uri_site_id_datatype_agent, + row_site_id_source=row_site_id_source, + graph=graph, + ) + + # Add visitOrgs Agents + for visit_org_agent in visit_org_agents: + self.add_visit_org_agent( + uri=visit_org_agent.uri, + row_visit_org=visit_org_agent.row_value, + graph=graph, + ) + + # Add visitObservers Agents + for visit_observer_agent in visit_observer_agents: + self.add_visit_observer_agent( + uri=visit_observer_agent.uri, + row_visit_observer=visit_observer_agent.row_value, + graph=graph, + ) + + # Add site visit plan + self.add_site_visit_plan( + uri=uri_site_visit_plan, + row=row, + dataset=dataset, + graph=graph, + base_iri=base_iri, + ) + + # Add targetTaxonomicScope Attribute, Value and Collection + self.add_target_taxonomic_scope_attribute( + uri=uri_target_taxonomic_scope_attribute, + row_target_taxonomic_scope=row_target_taxonomic_scope, + uri_target_taxonomic_scope_value=uri_target_taxonomic_scope_value, + dataset=dataset, + graph=graph, + ) + self.add_target_taxonomic_scope_value( + uri=uri_target_taxonomic_scope_value, + row_target_taxonomic_scope=row_target_taxonomic_scope, + dataset=dataset, + graph=graph, + base_iri=base_iri, + ) + self.add_target_taxonomic_scope_collection( + uri=uri_target_taxonomic_scope_collection, + row_target_taxonomic_scope=row_target_taxonomic_scope, + uri_target_taxonomic_scope_attribute=uri_target_taxonomic_scope_attribute, + uri_site_visit_activity=uri_site_visit_activity, + dataset=dataset, + graph=graph, + ) + + # Add samplingEffort Attribute, Value and Collection + self.add_sampling_effort_attribute( + uri=uri_sampling_effort_attribute, + row_sampling_effort=row_sampling_effort, + uri_sampling_effort_value=uri_sampling_effort_value, + dataset=dataset, + graph=graph, + ) + self.add_sampling_effort_value( + uri=uri_sampling_effort_value, + row_sampling_effort_value=row_sampling_effort_value, + row_sampling_effort_unit=row_sampling_effort_unit, + dataset=dataset, + graph=graph, + base_iri=base_iri, + ) + self.add_sampling_effort_collection( + uri=uri_sampling_effort_collection, + row_sampling_effort=row_sampling_effort, + uri_sampling_effort_attribute=uri_sampling_effort_attribute, + uri_site_visit_activity=uri_site_visit_activity, + dataset=dataset, + graph=graph, + ) + + # Add extra fields + self.add_extra_fields_json( + subject_uri=uri_site_visit_activity, + row=row, + graph=graph, + extra_schema=extra_schema, + ) + + def add_site_visit_activity( + self, + *, + uri: rdflib.URIRef, + row_site_visit_id: str, + uri_survey: rdflib.URIRef, + uri_site: rdflib.URIRef, + uri_site_visit_plan: rdflib.URIRef, + visit_org_agents: list[Agent], + visit_observer_agents: list[Agent], + row: frictionless.Row, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + # Add type + graph.add((uri, a, utils.namespaces.TERN.SiteVisit)) + # Add dataset link + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + # Add survey link + graph.add((uri, rdflib.SDO.isPartOf, uri_survey)) + # Add site link + graph.add((uri, utils.namespaces.TERN.hasSite, uri_site)) + + # Add identifier + graph.add((uri, rdflib.SDO.identifier, rdflib.Literal(row_site_visit_id))) + + # Add temporal entity for start/end time + temporal_entity = rdflib.BNode() + graph.add((uri, rdflib.TIME.hasTime, temporal_entity)) + graph.add((temporal_entity, a, rdflib.TIME.TemporalEntity)) + row_site_visit_start: models.temporal.Timestamp = row["siteVisitStart"] + row_site_visit_end: models.temporal.Timestamp | None = row["siteVisitEnd"] + start_instant = rdflib.BNode() + graph.add((start_instant, a, rdflib.TIME.Instant)) + graph.add((start_instant, row_site_visit_start.rdf_in_xsd, row_site_visit_start.to_rdf_literal())) + graph.add((temporal_entity, rdflib.TIME.hasBeginning, start_instant)) + if row_site_visit_end: + end_instant = rdflib.BNode() + graph.add((end_instant, a, rdflib.TIME.Instant)) + graph.add((end_instant, row_site_visit_end.rdf_in_xsd, row_site_visit_end.to_rdf_literal())) + graph.add((temporal_entity, rdflib.TIME.hasEnd, end_instant)) + + # Add link(s) to visitOrgs + for visit_org_agent in visit_org_agents: + graph.add((uri, rdflib.PROV.wasAssociatedWith, visit_org_agent.uri)) + + # Add link(s) to visitObservers + for visit_observer_agent in visit_observer_agents: + graph.add((uri, rdflib.PROV.wasAssociatedWith, visit_observer_agent.uri)) + + # Add condition + row_condition: str | None = row["condition"] + if row_condition: + graph.add((uri, utils.namespaces.TERN.siteDescription, rdflib.Literal(row_condition))) + + # Add link to Site Visit Plan + graph.add((uri, rdflib.PROV.hadPlan, uri_site_visit_plan)) + + def add_survey( + self, + uri: rdflib.URIRef, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Adds the basics of the Survey node to the graph. + + The other properties for the node come from the survey metadata. + + Args: + uri: The URI for the Survey node + dataset: The dataset URI + graph: The graph to update + """ + # Add type + graph.add((uri, a, utils.namespaces.TERN.Survey)) + # Add dataset link + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + + def add_site( + self, + *, + uri: rdflib.URIRef, + uri_site_id_datatype: rdflib.URIRef | None, + dataset: rdflib.URIRef, + row: frictionless.Row, + graph: rdflib.Graph, + ) -> None: + """Adds site to the graph. + + Args: + uri: Subject of the node. + uri_site_id_datatype: Datatype of the site + id source. + row: Raw data for row. + graph: Graph to be modified. + """ + # Add class + graph.add((uri, a, utils.namespaces.TERN.Site)) + + # Add siteID literal + dt = uri_site_id_datatype or rdflib.XSD.string + graph.add((uri, rdflib.SDO.identifier, rdflib.Literal(row["siteID"], datatype=dt))) + + # Add to dataset + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + + def add_site_id_datatype( + self, + *, + uri: rdflib.URIRef | None, + row_site_id_source: str | None, + uri_site_id_datatype_attribution: rdflib.URIRef | None, + graph: rdflib.Graph, + ) -> None: + """Adds site id datatype to the graph. + + Args: + uri: Subject of the node. + row_site_id_source: The siteIDSource value from the row. + uri_site_id_datatype_attribution: The datatype attribution node. + graph: Graph to be modified. + """ + # Check subject was provided + if uri is None: + return + # Add type + graph.add((uri, a, rdflib.RDFS.Datatype)) + # Add definition + graph.add((uri, rdflib.SKOS.definition, rdflib.Literal("An identifier for the site"))) + # Add label + if row_site_id_source: + graph.add((uri, rdflib.SKOS.prefLabel, rdflib.Literal(f"{row_site_id_source} Site ID"))) + # Add attribution link + if uri_site_id_datatype_attribution: + graph.add((uri, rdflib.PROV.qualifiedAttribution, uri_site_id_datatype_attribution)) + + def add_site_id_datatype_attribution( + self, + *, + uri: rdflib.URIRef | None, + uri_site_id_datatype_agent: rdflib.URIRef | None, + graph: rdflib.Graph, + ) -> None: + """Adds site id datatype attribution to the graph. + + Args: + uri: Suject of the node + uri_site_id_datatype_agent: The datatype agent node. + graph: The graph to be modified. + """ + # Check subject provided + if uri is None: + return + # Add type + graph.add((uri, a, rdflib.PROV.Attribution)) + # Add role + graph.add((uri, rdflib.PROV.hadRole, DATA_ROLE_RESOURCE_PROVIDER)) + # Add agent link + if uri_site_id_datatype_agent: + graph.add((uri, rdflib.PROV.agent, uri_site_id_datatype_agent)) + + def add_site_id_datatype_agent( + self, + *, + uri: rdflib.URIRef | None, + row_site_id_source: str | None, + graph: rdflib.Graph, + ) -> None: + """Adds the site id datatype agent to the graph. + + Args: + uri: Subject of the node. + row_site_id_source: The siteIDSource value from the row. + graph: Graph to be modified. + """ + # Check subject provided + if uri is None: + return + # Add type + graph.add((uri, a, rdflib.PROV.Agent)) + # Add name + if row_site_id_source: + graph.add((uri, rdflib.SDO.name, rdflib.Literal(row_site_id_source))) + + def add_visit_org_agent( + self, + *, + uri: rdflib.URIRef, + row_visit_org: str, + graph: rdflib.Graph, + ) -> None: + """Add a visit Org Agent node to the graph. + + Args: + uri: The URI for this agent + row_visit_org: One of the values from the visitOrgs field + graph: The graph to be modified. + """ + # Add subject types + graph.add((uri, a, rdflib.PROV.Agent)) + graph.add((uri, a, rdflib.PROV.Organization)) + # Add name + graph.add((uri, rdflib.SDO.name, rdflib.Literal(row_visit_org))) + + def add_visit_observer_agent( + self, + *, + uri: rdflib.URIRef, + row_visit_observer: str, + graph: rdflib.Graph, + ) -> None: + """Add a visit Observer Agent node to the graph. + + Args: + uri: The URI for this agent + row_visit_observer: One of the values from the visitObservers field + graph: The graph to be modified. + """ + # Add subject types + graph.add((uri, a, rdflib.PROV.Agent)) + graph.add((uri, a, rdflib.PROV.Person)) + # Add name + graph.add((uri, rdflib.SDO.name, rdflib.Literal(row_visit_observer))) + + def add_site_visit_plan( + self, + *, + uri: rdflib.URIRef, + row: frictionless.Row, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + base_iri: rdflib.Namespace, + ) -> None: + """Add a site visit prov:Plan node to the graph. + + Args: + uri: The URI for the site visit plan + row: Raw row from the template. + dataset: Dataset raw data belongs to. + graph: The graph to be modified. + base_iri: Namespace used to construct IRIs + """ + # Add subject type + graph.add((uri, a, rdflib.PROV.Plan)) + + # add link to dataset + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + + # Add description + row_protocol_description: str | None = row["protocolDescription"] + if row_protocol_description: + graph.add((uri, rdflib.SDO.description, rdflib.Literal(row_protocol_description))) + + # Add used procedure + row_protocol_name: str | None = row["protocolName"] + if row_protocol_name: + # Retrieve vocab for field + vocab = self.fields()["protocolName"].get_flexible_vocab() + # get or create term IRI + term = vocab(graph=graph, source=dataset, base_iri=base_iri).get(row_protocol_name) + # Add link to term + graph.add((uri, rdflib.SOSA.usedProcedure, term)) + + def add_target_taxonomic_scope_attribute( + self, + *, + uri: rdflib.URIRef | None, + row_target_taxonomic_scope: str | None, + uri_target_taxonomic_scope_value: rdflib.URIRef | None, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Add the target taxonomic scope Attribute node. + + Args: + uri: Subject of the node. + row_target_taxonomic_scope: Raw data in the targetTaxonomicScope field. + uri_target_taxonomic_scope_value: The target taxonomic scope Value node. + dataset (rdflib.URIRef): Dataset raw data belongs. + graph (rdflib.Graph): Graph to be modified. + """ + # Check subject is provided + if uri is None: + return + + # Add type + graph.add((uri, a, utils.namespaces.TERN.Attribute)) + + # Add dataset + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + + # Add attribute concept + graph.add((uri, utils.namespaces.TERN.attribute, CONCEPT_TARGET_TAXONOMIC_SCOPE)) + + # Add values + if row_target_taxonomic_scope: + graph.add((uri, utils.namespaces.TERN.hasSimpleValue, rdflib.Literal(row_target_taxonomic_scope))) + if uri_target_taxonomic_scope_value: + graph.add((uri, utils.namespaces.TERN.hasValue, uri_target_taxonomic_scope_value)) + + def add_target_taxonomic_scope_value( + self, + *, + uri: rdflib.URIRef | None, + row_target_taxonomic_scope: str | None, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + base_iri: rdflib.Namespace, + ) -> None: + """Adds the target taxonomic scope Attribute Value node. + + Args: + uri: Subject of the node. + row_target_taxonomic_scope: Raw data in the targetTaxonomicScope field. + dataset: Dataset raw data belongs. + graph: Graph to be modified. + base_iri: Namespace used to construct IRIs + """ + # check subject is provided + if uri is None: + return + + # Add types + graph.add((uri, a, utils.namespaces.TERN.IRI)) + graph.add((uri, a, utils.namespaces.TERN.Value)) + + if row_target_taxonomic_scope: + # Retrieve vocab for field + vocab = self.fields()["targetTaxonomicScope"].get_flexible_vocab() + + # Add value + term = vocab(graph=graph, source=dataset, base_iri=base_iri).get(row_target_taxonomic_scope) + graph.add((uri, rdflib.RDF.value, term)) + + def add_target_taxonomic_scope_collection( + self, + *, + uri: rdflib.URIRef | None, + row_target_taxonomic_scope: str | None, + uri_target_taxonomic_scope_attribute: rdflib.URIRef | None, + uri_site_visit_activity: rdflib.URIRef, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Add a target taxonomic scope Collection to the graph + + Args: + uri: The uri for the Collection. + row_target_taxonomic_scope: targetTaxonomicScope value from template. + uri_target_taxonomic_scope_attribute: The uri for the attribute node. + uri_site_visit_activity: The Site Visit node that should be a member of the collection. + dataset: The uri for the dateset node. + graph: The graph. + """ + if uri is None: + return + + # Add type + graph.add((uri, a, rdflib.SDO.Collection)) + # Add identifier + if row_target_taxonomic_scope: + graph.add( + ( + uri, + rdflib.SDO.name, + rdflib.Literal(f"Site Visit Collection - Target Taxonomic Scope - {row_target_taxonomic_scope}"), + ) + ) + # Add link to dataset + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + # add link to the SiteVisit node + graph.add((uri, rdflib.SDO.member, uri_site_visit_activity)) + # Add link to attribute + if uri_target_taxonomic_scope_attribute is not None: + graph.add((uri, utils.namespaces.TERN.hasAttribute, uri_target_taxonomic_scope_attribute)) + + def add_sampling_effort_attribute( + self, + *, + uri: rdflib.URIRef | None, + row_sampling_effort: str | None, + uri_sampling_effort_value: rdflib.URIRef | None, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Adds sampling effort Attribute node. + + Args: + uri: Subject of the node. + row_sampling_effort: Combination of samplingEffortValue and samplingEffortUnit fields + uri_sampling_effort_value: URI of the Attribute Value node. + dataset (rdflib.URIRef): Dataset raw data belongs. + graph (rdflib.Graph): Graph to be modified. + """ + # Check that subject is provided + if uri is None: + return + + # Add type + graph.add((uri, a, utils.namespaces.TERN.Attribute)) + + # Add dataset + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + + # Add concept + graph.add((uri, utils.namespaces.TERN.attribute, CONCEPT_SAMPLING_EFFORT)) + + # Add values + if row_sampling_effort: + graph.add((uri, utils.namespaces.TERN.hasSimpleValue, rdflib.Literal(row_sampling_effort))) + if uri_sampling_effort_value: + graph.add((uri, utils.namespaces.TERN.hasValue, uri_sampling_effort_value)) + + def add_sampling_effort_value( + self, + *, + uri: rdflib.URIRef | None, + row_sampling_effort_value: str | None, + row_sampling_effort_unit: str | None, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + base_iri: rdflib.Namespace, + ) -> None: + """Adds sampling effort Attribute Value node. + + Args: + uri: Subject of the node. + row_sampling_effort_value: Value from the samplingEffortValue field. + row_sampling_effort_unit: Value from the samplingEffortUnit field. + dataset (rdflib.URIRef): URI of the dataset this belongs to. + graph (rdflib.Graph): Graph to be modified. + base_iri (rdflib.Namespace): Namespace used to construct IRIs + """ + if uri is None: + return + + # Add types + graph.add((uri, a, utils.namespaces.TERN.Float)) + graph.add((uri, a, utils.namespaces.TERN.Value)) + + # Add value + if row_sampling_effort_value: + graph.add((uri, rdflib.RDF.value, rdflib.Literal(row_sampling_effort_value, datatype=rdflib.XSD.float))) + + # Add Unit + if row_sampling_effort_unit: + # Retrieve vocab for field + vocab = self.fields()["samplingEffortUnit"].get_flexible_vocab() + # Add value + term = vocab(graph=graph, source=dataset, base_iri=base_iri).get(row_sampling_effort_unit) + graph.add((uri, utils.namespaces.TERN.unit, term)) + + def add_sampling_effort_collection( + self, + *, + uri: rdflib.URIRef | None, + row_sampling_effort: str | None, + uri_sampling_effort_attribute: rdflib.URIRef | None, + uri_site_visit_activity: rdflib.URIRef, + dataset: rdflib.URIRef, + graph: rdflib.Graph, + ) -> None: + """Add a sampling effort Collection to the graph + + Args: + uri: The uri for the Collection. + row_sampling_effort: Combined samplingEffort value from template. + uri_sampling_effort_attribute: The uri for the attribute node. + uri_site_visit_activity: The Site Visit node that should be a member of the collection. + dataset: The uri for the dateset node. + graph: The graph. + """ + if uri is None: + return + + # Add type + graph.add((uri, a, rdflib.SDO.Collection)) + # Add identifier + if row_sampling_effort: + graph.add( + ( + uri, + rdflib.SDO.name, + rdflib.Literal(f"Site Visit Collection - Sampling Effort - {row_sampling_effort}"), + ) + ) + # Add link to dataset + graph.add((uri, rdflib.SDO.isPartOf, dataset)) + # add link to the SiteVisit node + graph.add((uri, rdflib.SDO.member, uri_site_visit_activity)) + # Add link to attribute + if uri_sampling_effort_attribute is not None: + graph.add((uri, utils.namespaces.TERN.hasAttribute, uri_sampling_effort_attribute)) + + +# Register Mapper +base.mapper.register_mapper(SurveySiteVisitMapper) diff --git a/abis_mapping/templates/survey_site_visit_data_v3/metadata.json b/abis_mapping/templates/survey_site_visit_data_v3/metadata.json new file mode 100644 index 00000000..25851ede --- /dev/null +++ b/abis_mapping/templates/survey_site_visit_data_v3/metadata.json @@ -0,0 +1,13 @@ +{ + "name": "survey_site_visit_data", + "label": "Systematic Survey Site Visit Data Template", + "version": "3.0.0", + "description": "A template for systematic survey site visit data", + "biodiversity_type": "Systematic Survey Site Visit Data", + "spatial_type": "Point, line, polygon", + "file_type": "CSV", + "sampling_type": "systematic survey", + "template_url": "https://raw.githubusercontent.com/gaiaresources/abis-mapping/main/abis_mapping/templates/survey_site_visit_data_v3/survey_site_visit_data.csv", + "schema_url": "https://raw.githubusercontent.com/gaiaresources/abis-mapping/main/abis_mapping/templates/survey_site_visit_data_v3/schema.json", + "template_lifecycle_status": "beta" +} diff --git a/abis_mapping/templates/survey_site_visit_data_v3/schema.json b/abis_mapping/templates/survey_site_visit_data_v3/schema.json new file mode 100644 index 00000000..c22f20f7 --- /dev/null +++ b/abis_mapping/templates/survey_site_visit_data_v3/schema.json @@ -0,0 +1,168 @@ +{ + "fields": [ + { + "name": "surveyID", + "title": "SurveyID", + "description": "The identifier of the Survey that the Site is related to in this dataset.", + "example": "AR220-01", + "type": "string", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "siteID", + "title": "Site ID", + "description": "A unique within dataset string identifier for the site. Valid values include strings that are used specifically for this survey or URIs from BDR Sites that have been established in previous surveys.", + "example": "P1", + "type": "string", + "format": "default", + "constraints": { + "required": true + } + }, + { + "name": "siteIDSource", + "title": "Site ID Source", + "description": "The organisation that assigned the SiteID to this Site", + "example": "TERN", + "type": "string", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "siteVisitID", + "title": "Site Visit ID", + "description": "The unique key assigned to a visit. A visit is a time distinct assessment conducted within a survey at a designated site.", + "example": "CPXEI0000001", + "type": "string", + "format": "default", + "constraints": { + "required": true, + "unique": true + } + }, + { + "name": "siteVisitStart", + "title": "Site Visit Start", + "description": "The temporal start of when the Site was being used to collect data for the survey. Expected values include date, dateTime, dateTimeStamp.", + "example": "2016-02-28", + "type": "timestamp", + "format": "default", + "constraints": { + "required": true + } + }, + { + "name": "siteVisitEnd", + "title": "Site Visit End", + "description": "The temporal end of when the Site was being used to collect data for the survey. Expected values include date, dateTime, dateTimeStamp.", + "example": "2016-02-28", + "type": "timestamp", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "visitOrgs", + "title": "Visit Orgs", + "description": "The names of the organisations responsible for recording the original Occurrence.", + "example": "NSW Dept of Planning, Industry and Environment.", + "type": "list", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "visitObservers", + "title": "Visit Observers", + "description": "A list (concatenated and separated using |) of names of people, groups, or organisations responsible for recording the original Occurrence.", + "example": "Oliver P. Pearson | Anita K. Pearson", + "type": "list", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "condition", + "title": "Condition", + "description": "The state of a patch of vegetation at the time of sampling relative to some specified standard or benchmark (where available).", + "example": "Burnt", + "type": "string", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "targetTaxonomicScope", + "title": "Target Taxonomic Scope", + "description": "The taxonomic group targeted for sampling during the Site Visit", + "example": "Coleoptera", + "type": "string", + "format": "default", + "constraints": { + "required": false + }, + "vocabularies": [ + "TARGET_TAXONOMIC_SCOPE" + ] + }, + { + "name": "protocolName", + "title": "Protocol Name", + "description": "Categorical descriptive name for the method used during the Site Visit.", + "example": "HARD TRAP", + "type": "string", + "format": "default", + "constraints": { + "required": false + }, + "vocabularies": [ + "VISIT_PROTOCOL_NAME" + ] + }, + { + "name": "protocolDescription", + "title": "Protocol Description", + "description": "A detailed description of the method used during the Site Visit. The description may include deviations from a protocol referred to in eco:protocolReferences. Recommended good practice is to provide information about instruments used, calibration, etc.", + "example": "Three conventional harp traps (3.2m ht x 2.2m w) were established in flight path zones for a period of 4 hrs at dawn and dusk for a total of 10 trap nights. Traps were visited on an hourly basis during each deployment period and the trap catch recorded for species, size, weight, sex, age and maternal status.", + "type": "string", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "samplingEffortValue", + "title": "Sample Effort", + "description": "Similar to eco:samplingEffortValue. The total sampling effort value. A samplingEffortValue must have a corresponding samplingEffortUnit", + "example": "20 x 12", + "type": "string", + "format": "default", + "constraints": { + "required": false + } + }, + { + "name": "samplingEffortUnit", + "title": "Sampling Effort Units", + "description": "Similar to eco:samplingEffortUnit. The units associated with samplingEffortValue.", + "example": "trapDays", + "type": "string", + "format": "default", + "constraints": { + "required": false + }, + "vocabularies": [ + "SAMPLING_EFFORT_UNIT" + ] + } + ] +} diff --git a/abis_mapping/templates/survey_site_visit_data_v3/survey_site_visit_data.csv b/abis_mapping/templates/survey_site_visit_data_v3/survey_site_visit_data.csv new file mode 100644 index 00000000..e24bff04 --- /dev/null +++ b/abis_mapping/templates/survey_site_visit_data_v3/survey_site_visit_data.csv @@ -0,0 +1 @@ +surveyID,siteID,siteIDSource,siteVisitID,siteVisitStart,siteVisitEnd,visitOrgs,visitObservers,condition,targetTaxonomicScope,protocolName,protocolDescription,samplingEffortValue,samplingEffortUnit diff --git a/abis_mapping/templates/survey_site_visit_data_v3/templates/instructions.md b/abis_mapping/templates/survey_site_visit_data_v3/templates/instructions.md new file mode 100644 index 00000000..aa57613e --- /dev/null +++ b/abis_mapping/templates/survey_site_visit_data_v3/templates/instructions.md @@ -0,0 +1,158 @@ +{% extends "BASE_TEMPLATE base.md" %} +{% block body %} +# SYSTEMATIC SURVEY SITE VISIT DATA TEMPLATE INSTRUCTIONS + +## Intended Usage +This Systematic Survey Site Visit Data template should be used to record data related +to the visit made to the Site area during a systematic survey. + +This Systematic Survey Site Visit template **must be used in combination** with the +Systematic Survey Site Data template. + +Templates have been provided to facilitate integration of data into the Biodiversity Data +Repository (BDR) database. Not all types of data have been catered for in the available +templates at this stage - if you are unable to find a suitable template, please +contact to make us aware of your data needs. + +## Data Validation Requirements: +For data validation, you will need your data file to: +- be the correct **file format**, +- have **fields that match the template downloaded** (do not remove, or change the order of fields), +- have extant values for **mandatory fields** (see Table 1), and +- comply with all **data value constraints**, +- align with existing controlled [vocabularies](#appendix-i-vocabulary-list) wherever possible, but +new terms may be submitted for consideration and will not cause a validation error. + +Additional fields may be added **after the templated fields** (noting that the data type +is not assumed and values will be encoded as strings). + +### FILE FORMAT +- The systematic survey site visit data template is a [UTF-8](#appendix-iii-utf-8) encoded csv (not Microsoft + Excel Spreadsheets). Be sure to save this file with your data as a .csv (UTF-8) as follows, + otherwise it will not pass the csv validation step upon upload. +
`[MS Excel: Save As > More options > Tools > Web options > Save this document as > + Unicode (UTF-8)]`
+ otherwise it will not pass the csv validation step upon upload. +- **Do not include empty rows**. + +### FILE NAME + +When making a manual submission to the Biodiversity Data Repository, +the file name must include the version number +of this biodiversity data template (`v{{ metadata.version }}`). +The following format is an example of a valid file name: + +`data_descripion-v{{ metadata.version }}-additional_description.csv` + +where: + +* `data_description`: A short description of the data (e.g. `survey_site_visits`, `test_data`). +* `v{{ metadata.version }}`: The version number of this template. +* `additional_description`: (Optional) Additional description of the data, if needed (e.g. `test_data`). +* `.csv`: Ensure the file name ends with `.csv`. + +For example, `survey_site_visits-v{{ metadata.version }}-test_data.csv` or `test_data-v{{ metadata.version }}.csv` + +### FILE SIZE +MS Excel imposes a limit of 1,048,576 rows on a spreadsheet, limiting a CSV file to the +header row followed by 1,048,575 occurrences. Furthermore, MS Excel has a 32,767 character +limit on individual cells in a spreadsheet. These limits may be overcome by using or +editing CSV files with other software. + +Larger datasets may be more readily ingested using the API interface. Please contact + to make us aware of your data needs. + +## TEMPLATE FIELDS +The template contains the field names in the top row. Table 1 will assist you in transferring +your data to the template indicating: + +- **Field name** in the template (and an external link to the [Data standard](https://linkeddata.tern.org.au/) + for that field where relevant); +- **Description** of the field; +- **Required** i.e. whether the field is **mandatory, +conditionally mandatory, or optional**; +- **Format** (datatype) required for the data values for example text (string), number + (integer, float), or date; +- **Example** of an entry or entries for that field; and +- **[Vocabulary links](#appendix-i-vocabulary-list)** within this document (for example pick list values) where + relevant. The fields that have suggested values options for the fields in Table 1 are + listed in Table 2 in alphabetical order of the field name. + +### ADDITIONAL FIELDS +Data that does not match the existing template fields may be added as additional columns in +the CSV files after the templated fields. +For example, `instrumentType`, `instrumentIdentifier`, `weatherConditions`. + +Table 1: Systematic Survey Site Visit data template fields with descriptions, conditions, datatype format, and examples. + +{{tables.fields}} + +## CHANGELOG + +No changes from Systematic Survey Site Visit Data Template v2.0.0 + +## APPENDICES +### APPENDIX-I: Vocabulary List +Data validation does not require adherence to the vocabularies for the various vocabularied fields. +These vocabularies are merely provided as a means of assistance in developing consistent language +within the database. New terms may be added to more appropriately describe your data that goes +beyond the current list. + +Table 2: Suggested values for controlled vocabulary fields in the template. Each term has a preferred label with a definition to aid understanding +of its meaning. For some terms, alternative +labels with similar semantics are provided. + +{{tables.vocabularies}} + +### APPENDIX-II: Timestamp +Following date and date-time formats are acceptable within the timestamp: + +| TYPE | FORMAT | +| --- |-------------------------------------------------------------------------------------------------------------------------------------| +| **xsd:dateTimeStamp with timezone** | yyyy-mm-ddThh:mm:ss.sTZD (eg 1997-07-16T19:20:30.45+01:00) OR
yyyy-mm-ddThh:mm:ssTZD (eg 1997-07-16T19:20:30+01:00) OR
yyyy-mm-ddThh:mmTZD (eg 1997-07-16T19:20+01:00)| +| **xsd:dateTime** | yyyy-mm-ddThh:mm:ss.s (eg 1997-07-16T19:20:30.45) OR
yyyy-mm-ddThh:mm:ss (eg 1997-07-16T19:20:30) OR
yyyy-mm-ddThh:mm (eg 1997-07-16T19:20) | +| **xsd:Date** | dd/mm/yyyy OR
d/m/yyyy OR
yyyy-mm-dd OR
yyyy-m-d | +| **xsd:gYearMonth** | mm/yyyy OR
m/yyyy OR
yyyy-mm | +| **xsd:gYear** | yyyy | + +Where:
+  `yyyy`: four-digit year
+  `mm`: two-digit month (01=January, etc.)
+  `dd`: two-digit day of month (01 through 31)
+  `hh`: two digits of hour (00 through 23) (am/pm NOT allowed)
+  `mm`: two digits of minute (00 through 59)
+  `ss`: two digits of second (00 through 59)
+  `s`: one or more digits representing a decimal fraction of a second +  `TZD`: time zone designator (Z or +hh:mm or -hh:mm) + + +### APPENDIX-III: UTF-8 +UTF-8 encoding is considered a best practice for handling character encoding, especially in +the context of web development, data exchange, and modern software systems. UTF-8 +(Unicode Transformation Format, 8-bit) is a variable-width character encoding capable of +encoding all possible characters (code points) in Unicode.
+Here are some reasons why UTF-8 is recommended: +- **Universal Character Support:** UTF-8 can represent almost all characters from all writing + systems in use today. This includes characters from various languages, mathematical symbols, + and other special characters. +- **Backward Compatibility:** UTF-8 is backward compatible with ASCII (American + Standard Code for Information Interchange). The first 128 characters in UTF-8 are + identical to ASCII, making it easy to work with systems that use ASCII. +- **Efficiency:** UTF-8 is space-efficient for Latin-script characters (common in English + and many other languages). It uses one byte for ASCII characters and up to four + bytes for other characters. This variable-length encoding minimises storage and + bandwidth requirements. +- **Web Standards:** UTF-8 is the dominant character encoding for web content. It is + widely supported by browsers, servers, and web-related technologies. +- **Globalisation:** As software applications become more globalised, supporting a wide + range of languages and scripts becomes crucial. UTF-8 is well-suited for + internationalisation and multilingual support. +- **Compatibility with Modern Systems:** UTF-8 is the default encoding for many + programming languages, databases, and operating systems. Choosing UTF-8 helps + ensure compatibility across different platforms and technologies. + +When working with text data, UTF-8 encoding is recommended to avoid issues related to character +representation and ensure that a diverse set of characters and languages is supported. + +For assistance, please contact: +{% endblock %} diff --git a/docs/contexts/__init__.py b/docs/contexts/__init__.py index 1f147d63..3c6f890a 100644 --- a/docs/contexts/__init__.py +++ b/docs/contexts/__init__.py @@ -4,6 +4,10 @@ from . import base from . import incidental_occurrence_data_v3 from . import survey_occurrence_data_v2 +from . import survey_occurrence_data_v3 from . import survey_metadata_v2 +from . import survey_metadata_v3 from . import survey_site_data_v2 +from . import survey_site_data_v3 from . import survey_site_visit_data_v2 +from . import survey_site_visit_data_v3 diff --git a/docs/contexts/survey_metadata_v3.py b/docs/contexts/survey_metadata_v3.py new file mode 100644 index 00000000..aa92b1b2 --- /dev/null +++ b/docs/contexts/survey_metadata_v3.py @@ -0,0 +1,29 @@ +"""Declares and registers the survey metadata v3 instruction rendering context.""" + +# Local +from abis_mapping import base +from abis_mapping import vocabs +import abis_mapping.templates.survey_metadata_v3.mapping +from docs import contexts +from docs import tables + + +# Constants +mapper = abis_mapping.templates.survey_metadata_v3.mapping.SurveyMetadataMapper +mapper_id = mapper().template_id + +if mapper_id in base.mapper.registered_ids(): + # Create context + _ctx = { + "tables": { + "fields": tables.fields.FieldTabler(template_id=mapper_id, format="markdown").generate_table(), + "vocabularies": tables.vocabs.VocabTabler(template_id=mapper_id, format="markdown").generate_table(), + }, + "values": { + "geodetic_datum_count": len(vocabs.geodetic_datum.GeodeticDatum.terms), + }, + "metadata": mapper.metadata(), + } + + # Register + contexts.base.register(mapper_id, _ctx) diff --git a/docs/contexts/survey_occurrence_data_v3.py b/docs/contexts/survey_occurrence_data_v3.py new file mode 100644 index 00000000..ebf6110d --- /dev/null +++ b/docs/contexts/survey_occurrence_data_v3.py @@ -0,0 +1,40 @@ +"""Declares and registers the survey occurrence data v3 instruction rendering context.""" + +# Local +from abis_mapping import base +from abis_mapping import vocabs +from docs import contexts +from docs import tables + +# Typing +from typing import Any + +# Constants +mapper_id = "survey_occurrence_data-v3.0.0.csv" + + +# Create context +def _ctx() -> dict[str, Any]: + """Returns the context for rendering the instructions of the mapper.""" + # Retrieve mapper + mapper = base.mapper.get_mapper(mapper_id) + + # Return + return { + "tables": { + "fields": tables.fields.OccurrenceFieldTabler(template_id=mapper_id, format="markdown").generate_table(), + "vocabularies": tables.vocabs.VocabTabler(template_id=mapper_id, format="markdown").generate_table(), + "threat_status": tables.threat_status.ThreatStatusTabler( + template_id=mapper_id, format="markdown" + ).generate_table(), + }, + "values": { + "geodetic_datum_count": len(vocabs.geodetic_datum.GeodeticDatum.terms), + }, + "metadata": mapper.metadata() if mapper is not None else None, + } + + +# Register once the mapper is also registered +if mapper_id in base.mapper.registered_ids(): + contexts.base.register(mapper_id, _ctx()) diff --git a/docs/contexts/survey_site_data_v3.py b/docs/contexts/survey_site_data_v3.py new file mode 100644 index 00000000..2b8fb392 --- /dev/null +++ b/docs/contexts/survey_site_data_v3.py @@ -0,0 +1,29 @@ +"""Declares and registers the survey site data v3 instruction rendering context.""" + +# Local +from abis_mapping import base +from abis_mapping import vocabs +import abis_mapping.templates.survey_site_data_v3.mapping +from docs import contexts +from docs import tables + + +# Constants +mapper = abis_mapping.templates.survey_site_data_v3.mapping.SurveySiteMapper +mapper_id = mapper().template_id + +if mapper_id in base.mapper.registered_ids(): + # Declare context + _ctx = { + "tables": { + "vocabularies": tables.vocabs.VocabTabler(template_id=mapper_id, format="markdown").generate_table(), + "fields": tables.fields.FieldTabler(template_id=mapper_id, format="markdown").generate_table(), + }, + "values": { + "geodetic_datum_count": len(vocabs.geodetic_datum.GeodeticDatum.terms), + }, + "metadata": mapper.metadata(), + } + + # Register + contexts.base.register(mapper_id, _ctx) diff --git a/docs/contexts/survey_site_visit_data_v3.py b/docs/contexts/survey_site_visit_data_v3.py new file mode 100644 index 00000000..b50ef85a --- /dev/null +++ b/docs/contexts/survey_site_visit_data_v3.py @@ -0,0 +1,26 @@ +"""Declares and registers the survey site visit data v3 instruction rendering context.""" + +# Local +from abis_mapping import base +import abis_mapping.templates.survey_site_visit_data_v3.mapping +from docs import contexts +from docs import tables + + +# Constants +mapper = abis_mapping.templates.survey_site_visit_data_v3.mapping.SurveySiteVisitMapper +mapper_id = mapper().template_id + +if mapper_id in base.mapper.registered_ids(): + # Declare context + _ctx = { + "tables": { + "vocabularies": tables.vocabs.VocabTabler(template_id=mapper_id, format="markdown").generate_table(), + "fields": tables.fields.FieldTabler(template_id=mapper_id, format="markdown").generate_table(), + }, + "values": {}, + "metadata": mapper.metadata(), + } + + # Register + contexts.base.register(mapper_id, _ctx) diff --git a/scripts/generate_instructions.sh b/scripts/generate_instructions.sh index 07c503b8..e67e1c31 100755 --- a/scripts/generate_instructions.sh +++ b/scripts/generate_instructions.sh @@ -10,3 +10,9 @@ python docs/instructions.py -o docs/pages/survey_metadata-v2.0.0.csv.md survey_m python docs/instructions.py -o docs/pages/survey_occurrence_data-v2.0.0.csv.md survey_occurrence_data-v2.0.0.csv python docs/instructions.py -o docs/pages/survey_site_data-v2.0.0.csv.md survey_site_data-v2.0.0.csv python docs/instructions.py -o docs/pages/survey_site_visit_data-v2.0.0.csv.md survey_site_visit_data-v2.0.0.csv + +# survey v3 +python docs/instructions.py -o docs/pages/survey_metadata-v3.0.0.csv.md survey_metadata-v3.0.0.csv +python docs/instructions.py -o docs/pages/survey_occurrence_data-v3.0.0.csv.md survey_occurrence_data-v3.0.0.csv +python docs/instructions.py -o docs/pages/survey_site_data-v3.0.0.csv.md survey_site_data-v3.0.0.csv +python docs/instructions.py -o docs/pages/survey_site_visit_data-v3.0.0.csv.md survey_site_visit_data-v3.0.0.csv diff --git a/tests/templates/conftest.py b/tests/templates/conftest.py index 18319255..94093341 100644 --- a/tests/templates/conftest.py +++ b/tests/templates/conftest.py @@ -76,86 +76,6 @@ class TemplateTestParameters: TEST_CASES_ALL: list[TemplateTestParameters] = [ - # Survey templates v1 - TemplateTestParameters( - template_id="survey_occurrence_data-v1.0.0.csv", - empty_template=pathlib.Path("abis_mapping/templates/survey_occurrence_data/survey_occurrence_data.csv"), - mapping_cases=[ - MappingParameters( - data=pathlib.Path( - ( - "abis_mapping/templates/survey_occurrence_data/examples" - "/margaret_river_flora/margaret_river_flora.csv" - ) - ), - expected=pathlib.Path( - ( - "abis_mapping/templates/survey_occurrence_data/examples" - "/margaret_river_flora/margaret_river_flora.ttl" - ) - ), - ), - MappingParameters( - scenario_name="organism_qty", - should_validate=True, - data=pathlib.Path( - "abis_mapping/templates/survey_occurrence_data/examples/organism_qty.csv", - ), - expected=pathlib.Path( - "abis_mapping/templates/survey_occurrence_data/examples/organism_qty.ttl", - ), - ), - ], - metadata_sampling_type="systematic survey", - allows_extra_cols=True, - chunking_parameters=[ - ChunkingParameters( - data=pathlib.Path( - ( - "abis_mapping/templates/survey_occurrence_data/examples/" - "margaret_river_flora/margaret_river_flora.csv" - ) - ), - chunk_size=7, - yield_count=3, - ), - ], - ), - TemplateTestParameters( - template_id="survey_site_data-v1.0.0.csv", - empty_template=pathlib.Path( - "abis_mapping/templates/survey_site_data/survey_site_data.csv", - ), - mapping_cases=[ - MappingParameters( - data=pathlib.Path("abis_mapping/templates/survey_site_data/examples/minimal.csv"), - expected=pathlib.Path("abis_mapping/templates/survey_site_data/examples/minimal.ttl"), - ), - ], - metadata_sampling_type="systematic survey", - allows_extra_cols=True, - ), - TemplateTestParameters( - template_id="survey_metadata-v1.0.0.csv", - empty_template=pathlib.Path("abis_mapping/templates/survey_metadata/survey_metadata.csv"), - mapping_cases=[ - MappingParameters( - data=pathlib.Path("abis_mapping/templates/survey_metadata/examples/minimal.csv"), - expected=pathlib.Path("abis_mapping/templates/survey_metadata/examples/minimal.ttl"), - ), - MappingParameters( - scenario_name="invalid_chrono_order", - should_validate=False, - expected_error_codes={"row-constraint"}, - data=pathlib.Path( - "abis_mapping/templates/survey_metadata/examples/minimal_error_chronological_order.csv" - ), - expected=None, - ), - ], - metadata_sampling_type="systematic survey", - allows_extra_cols=True, - ), # Survey templates v2 TemplateTestParameters( template_id="survey_occurrence_data-v2.0.0.csv", @@ -304,35 +224,82 @@ class TemplateTestParameters: metadata_sampling_type="systematic survey", allows_extra_cols=True, ), - # Incidental templates + # Survey templates v3 TemplateTestParameters( - template_id="incidental_occurrence_data-v2.0.0.csv", - empty_template=pathlib.Path( - "abis_mapping/templates/incidental_occurrence_data_v2/incidental_occurrence_data.csv" - ), + template_id="survey_metadata-v3.0.0.csv", + empty_template=pathlib.Path("abis_mapping/templates/survey_metadata_v3/survey_metadata.csv"), + mapping_cases=[ + MappingParameters( + data=pathlib.Path("abis_mapping/templates/survey_metadata_v3/examples/minimal.csv"), + expected=pathlib.Path("abis_mapping/templates/survey_metadata_v3/examples/minimal.ttl"), + ), + MappingParameters( + scenario_name="invalid_chrono_order", + should_validate=False, + expected_error_codes={"row-constraint"}, + data=pathlib.Path( + "abis_mapping/templates/survey_metadata_v3/examples/minimal_error_chronological_order.csv" + ), + expected=None, + ), + MappingParameters( + scenario_name="too_many_rows", + should_validate=False, + expected_error_codes={"table-dimensions"}, + data=pathlib.Path("abis_mapping/templates/survey_metadata_v3/examples/minimal_error_too_many_rows.csv"), + expected=None, + ), + MappingParameters( + scenario_name="mutually-inclusive-field-missing", + should_validate=False, + expected_error_codes={"row-constraint"}, + data=pathlib.Path("abis_mapping/templates/survey_metadata_v3/examples/minimal_error_missing_datum.csv"), + expected=None, + ), + ], + metadata_sampling_type="systematic survey", + allows_extra_cols=True, + ), + TemplateTestParameters( + template_id="survey_occurrence_data-v3.0.0.csv", + empty_template=pathlib.Path("abis_mapping/templates/survey_occurrence_data_v3/survey_occurrence_data.csv"), mapping_cases=[ MappingParameters( data=pathlib.Path( ( - "abis_mapping/templates/incidental_occurrence_data_v2/examples/" - "margaret_river_flora/margaret_river_flora.csv" + "abis_mapping/templates/survey_occurrence_data_v3/examples" + "/margaret_river_flora/margaret_river_flora.csv" ) ), expected=pathlib.Path( ( - "abis_mapping/templates/incidental_occurrence_data_v2/examples/" - "margaret_river_flora/margaret_river_flora.ttl" + "abis_mapping/templates/survey_occurrence_data_v3/examples" + "/margaret_river_flora/margaret_river_flora.ttl" ) ), + shacl=[ + pathlib.Path("abis_mapping/base/validators/shapes.ttl"), + pathlib.Path("abis_mapping/templates/survey_occurrence_data_v3/validators/validator.ttl"), + ], + ), + MappingParameters( + scenario_name="organism_qty", + should_validate=True, + data=pathlib.Path( + "abis_mapping/templates/survey_occurrence_data_v3/examples/organism_qty.csv", + ), + expected=pathlib.Path( + "abis_mapping/templates/survey_occurrence_data_v3/examples/organism_qty.ttl", + ), ), ], - metadata_sampling_type="incidental", + metadata_sampling_type="systematic survey", allows_extra_cols=True, chunking_parameters=[ ChunkingParameters( data=pathlib.Path( ( - "abis_mapping/templates/incidental_occurrence_data_v2/examples/" + "abis_mapping/templates/survey_occurrence_data_v3/examples/" "margaret_river_flora/margaret_river_flora.csv" ) ), @@ -341,6 +308,71 @@ class TemplateTestParameters: ), ], ), + TemplateTestParameters( + template_id="survey_site_data-v3.0.0.csv", + empty_template=pathlib.Path( + "abis_mapping/templates/survey_site_data_v3/survey_site_data.csv", + ), + mapping_cases=[ + MappingParameters( + data=pathlib.Path("abis_mapping/templates/survey_site_data_v3/examples/minimal.csv"), + expected=pathlib.Path("abis_mapping/templates/survey_site_data_v3/examples/minimal.ttl"), + ), + MappingParameters( + scenario_name="missing_relatedSiteID_and_datum", + data=pathlib.Path( + "abis_mapping/templates/survey_site_data_v3/examples/minimal-error-missing-fields.csv" + ), + expected=None, + should_validate=False, + expected_error_codes={"row-constraint"}, + ), + MappingParameters( + scenario_name="duplicate-site-ids", + data=pathlib.Path( + "abis_mapping/templates/survey_site_data_v3/examples/minimal-error-duplicate-site-ids.csv" + ), + expected=None, + should_validate=False, + expected_error_codes={"unique-error", "primary-key"}, + ), + ], + metadata_sampling_type="systematic survey", + allows_extra_cols=True, + ), + TemplateTestParameters( + template_id="survey_site_visit_data-v3.0.0.csv", + empty_template=pathlib.Path( + "abis_mapping/templates/survey_site_visit_data_v3/survey_site_visit_data.csv", + ), + mapping_cases=[ + MappingParameters( + data=pathlib.Path("abis_mapping/templates/survey_site_visit_data_v3/examples/minimal.csv"), + expected=pathlib.Path("abis_mapping/templates/survey_site_visit_data_v3/examples/minimal.ttl"), + ), + MappingParameters( + scenario_name="missing_start_date", + data=pathlib.Path( + "abis_mapping/templates/survey_site_visit_data_v3/examples/minimal-error-no-dates.csv" + ), + expected=None, + should_validate=False, + expected_error_codes={"constraint-error"}, + ), + MappingParameters( + scenario_name="dates_in_wrong_order", + data=pathlib.Path( + "abis_mapping/templates/survey_site_visit_data_v3/examples/minimal-error-dates-wrong-order.csv" + ), + expected=None, + should_validate=False, + expected_error_codes={"row-constraint"}, + ), + ], + metadata_sampling_type="systematic survey", + allows_extra_cols=True, + ), + # Incidental templates TemplateTestParameters( template_id="incidental_occurrence_data-v3.0.0.csv", empty_template=pathlib.Path(