diff --git a/abis_mapping/templates/survey_occurrence_data_v2/mapping.py b/abis_mapping/templates/survey_occurrence_data_v2/mapping.py index 8c7f18d6..a48ad075 100644 --- a/abis_mapping/templates/survey_occurrence_data_v2/mapping.py +++ b/abis_mapping/templates/survey_occurrence_data_v2/mapping.py @@ -542,7 +542,7 @@ def apply_mapping_row( # Create Tern Site IRI, depending on the siteID field site_id: str | None = row["siteID"] if site_id: - site = utils.iri_patterns.site_iri(base_iri, site_id) + site = utils.iri_patterns.legacy_site_iri(base_iri, site_id) else: site = None diff --git a/abis_mapping/templates/survey_occurrence_data_v3/examples/margaret_river_flora/margaret_river_flora.csv b/abis_mapping/templates/survey_occurrence_data_v3/examples/margaret_river_flora/margaret_river_flora.csv index 92d7c5c3..fd97338d 100644 --- a/abis_mapping/templates/survey_occurrence_data_v3/examples/margaret_river_flora/margaret_river_flora.csv +++ b/abis_mapping/templates/survey_occurrence_data_v3/examples/margaret_river_flora/margaret_river_flora.csv @@ -13,5 +13,5 @@ providerRecordID,providerRecordIDSource,locality,decimalLatitude,decimalLongitud 12,WAM,,-33.8,115.21,WGS84,,,26/09/2019,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Caladenia excelsa,,,,Plantae,,,,,,,,,,,,, 13,WAM,Cowaramup Bay Road,-33.86,115.02,WGS84,,,26/09/2019,,,PreservedSpecimen,Stream Environment and Water Pty Ltd,,,,,,,,,,,,,,C01,CC123,WAM,,,,,,,,,Stream Environment and Water Pty Ltd,,Caladenia excelsa,,,,Plantae,,,,,,,,,,,,, 14,WAM,Cowaramup Bay Road,-33.86,115.02,WGS84,20,Coordinates rounded to the nearest 10 km for conservation concern,26/09/2019,,,HumanObservation,Stream Environment and Water Pty Ltd,,,,,,,,,,,,,,C01,CC456,WAM,,,,,,,Caladenia ?excelsa,,Stream Environment and Water Pty Ltd,,Caladenia excelsa,?,Could not confirm the ID due to damaged flower,,Plantae,,,,,,,,,,,,, -8022FSJMJ079c5cf,WAM,Cowaramup Bay Road,-33.8,115.21,WGS84,50,Coordinates rounded to the nearest 10 km for conservation concern,26/09/2019,,human observation,PreservedSpecimen,Stream Environment and Water Pty Ltd,PE:12:8832,present,"Closed forest of Melaleuca lanceolata. White, grey or brown sand, sandy loam.",native,Dried out leaf tips,2,,,adult,male,No breeding evident,MR-456,Stream Environment and Water Pty Ltd,32237,ARACH,WAM,BHP2012-7521 | M12378,BHP,Wet (in ethanol or some other preservative),26/09/2019,https://www.ncbi.nlm.nih.gov/nuccore/MH040669.1 | https://www.ncbi.nlm.nih.gov/nuccore/MH040616.1,Sanger dideoxy sequencing,Caladenia ?excelsa,2019-09-27T12:34+08:00,Stream Environment and Water Pty Ltd,Visually identified in the field (sighting),Caladenia excelsa,species incerta,no flowers present,Caladenia excelsa Hopper & A.P.Br.,Plantae,species,VU,WA,Check against Threatened and Priority Fauna List WA available from https://www.dpaw.wa.gov.au/plants-and-animals/threatened-species-and-communities/threatened-animals. Last updated 13 June 2022,,WA-BIO,Category 1,Department of Biodiversity and Conservation,MR-R1,MR-S1,WAM,,MR-R1-V1 +8022FSJMJ079c5cf,WAM,Cowaramup Bay Road,-33.8,115.21,WGS84,50,Coordinates rounded to the nearest 10 km for conservation concern,26/09/2019,,human observation,PreservedSpecimen,Stream Environment and Water Pty Ltd,PE:12:8832,present,"Closed forest of Melaleuca lanceolata. White, grey or brown sand, sandy loam.",native,Dried out leaf tips,2,,,adult,male,No breeding evident,MR-456,Stream Environment and Water Pty Ltd,32237,ARACH,WAM,BHP2012-7521 | M12378,BHP,Wet (in ethanol or some other preservative),26/09/2019,https://www.ncbi.nlm.nih.gov/nuccore/MH040669.1 | https://www.ncbi.nlm.nih.gov/nuccore/MH040616.1,Sanger dideoxy sequencing,Caladenia ?excelsa,2019-09-27T12:34+08:00,Stream Environment and Water Pty Ltd,Visually identified in the field (sighting),Caladenia excelsa,species incerta,no flowers present,Caladenia excelsa Hopper & A.P.Br.,Plantae,species,VU,WA,Check against Threatened and Priority Fauna List WA available from https://www.dpaw.wa.gov.au/plants-and-animals/threatened-species-and-communities/threatened-animals. Last updated 13 June 2022,,WA-BIO,Category 1,Department of Biodiversity and Conservation,MR-R1,MR-S1,WAM,https://example.com/site/WAM-MR-S1,MR-R1-V1 ABC123,WAM,Cowaramup Bay Road,-33.8,115.21,WGS84,30,Coordinates generalised,26/09/2019,,new sampling protocol,new basis of record,Stream Environment and Water Pty Ltd,PE:12:8833,new occurrence status,new habitat,new establishment means,Leaves brown,6,,,new life stage,new sex,new reproductiveCondition,MR-457,Stream Environment and Water Pty Ltd,32238,ARACH,WAM,BHP2012-7522 | M12379,BHP,new preparations,27/09/2019,https://www.ncbi.nlm.nih.gov/nuccore/MH040669.1 | https://www.ncbi.nlm.nih.gov/nuccore/MH040616.1,new sequencing method,Caladenia ?excelsa,2019-09-27T12:34+08:00,Stream Environment and Water Pty Ltd,new identification method,Caladenia excelsa,new identification qualifier,new remarks,Caladenia excelsa Hopper & A.P.Br.,new kingdom,new taxon rank,new threat status,WA,a random selection,,,Category 1,Department of Biodiversity and Conservation,MR-R1,MR-S1,WAM,, diff --git a/abis_mapping/templates/survey_occurrence_data_v3/examples/margaret_river_flora/margaret_river_flora.ttl b/abis_mapping/templates/survey_occurrence_data_v3/examples/margaret_river_flora/margaret_river_flora.ttl index 6169f338..2afd1820 100644 --- a/abis_mapping/templates/survey_occurrence_data_v3/examples/margaret_river_flora/margaret_river_flora.ttl +++ b/abis_mapping/templates/survey_occurrence_data_v3/examples/margaret_river_flora/margaret_river_flora.ttl @@ -770,6 +770,16 @@ skos:prefLabel "Stream Environment and Water Pty Ltd recordNumber" ; prov:wasAttributedTo . + a rdfs:Datatype ; + skos:definition "An identifier for the site" ; + skos:prefLabel "WAM Site ID" ; + prov:qualifiedAttribution . + + a tern:FeatureOfInterest, + tern:Site ; + schema:identifier "MR-S1"^^ ; + tern:featureType . + a tern:SiteVisit ; schema:isPartOf . @@ -897,10 +907,6 @@ prov:agent ; prov:hadRole . - a prov:Attribution ; - prov:agent ; - prov:hadRole . - a skos:Concept ; skos:broader ; skos:definition "A type of basisOfRecord." ; @@ -1309,14 +1315,17 @@ a prov:Agent ; schema:name "WA-BIO" . - a tern:FeatureOfInterest, + a tern:FeatureOfInterest, tern:Site ; - schema:isPartOf ; tern:featureType . a tern:Survey ; schema:isPartOf . + a prov:Attribution ; + prov:agent ; + prov:hadRole . + a skos:Concept ; skos:broader ; skos:definition "A type of identificationMethod." ; @@ -1689,7 +1698,7 @@ dwc:catalogNumber "ARACH"^^ ; dwc:collectionCode "32237" ; prov:wasAssociatedWith ; - sosa:isSampleOf ; + sosa:isSampleOf ; sosa:usedProcedure ; schema:identifier "BHP2012-7521"^^, "M12378"^^, @@ -1710,7 +1719,7 @@ dwc:catalogNumber "ARACH"^^ ; dwc:collectionCode "32238" ; prov:wasAssociatedWith ; - sosa:isSampleOf ; + sosa:isSampleOf ; sosa:usedProcedure ; schema:identifier "BHP2012-7522"^^, "M12379"^^, diff --git a/abis_mapping/templates/survey_occurrence_data_v3/examples/organism_qty.ttl b/abis_mapping/templates/survey_occurrence_data_v3/examples/organism_qty.ttl index a742f61d..ea37967c 100644 --- a/abis_mapping/templates/survey_occurrence_data_v3/examples/organism_qty.ttl +++ b/abis_mapping/templates/survey_occurrence_data_v3/examples/organism_qty.ttl @@ -46,11 +46,6 @@ skos:prefLabel "Gaia Resources recordID" ; prov:qualifiedAttribution . - a tern:FeatureOfInterest, - tern:Site ; - schema:isPartOf ; - tern:featureType . - a prov:Attribution ; prov:agent ; prov:hadRole . @@ -72,9 +67,13 @@ a prov:Agent ; schema:name "Gaia Resources" . + a tern:FeatureOfInterest, + tern:Site ; + tern:featureType . + a dwc:Occurrence, tern:FeatureOfInterest ; - sosa:isSampleOf ; + sosa:isSampleOf ; sosa:usedProcedure ; schema:isPartOf ; schema:spatial _:Nb0c3d4fa822b88b4d3f8743700000000 ; diff --git a/abis_mapping/templates/survey_occurrence_data_v3/mapping.py b/abis_mapping/templates/survey_occurrence_data_v3/mapping.py index 5b890a34..82b0172e 100644 --- a/abis_mapping/templates/survey_occurrence_data_v3/mapping.py +++ b/abis_mapping/templates/survey_occurrence_data_v3/mapping.py @@ -561,13 +561,30 @@ def apply_mapping_row( else: survey = None - # Create Tern Site IRI, depending on the siteID field + # Create Tern Site IRI, depending on the site fields site_id: str | None = row["siteID"] - if site_id: - site = utils.iri_patterns.site_iri(base_iri, site_id) + site_id_source: str | None = row["siteIDSource"] + existing_site_iri: str | None = row["existingBDRSiteIRI"] + if existing_site_iri: + site = rdflib.URIRef(existing_site_iri) + elif site_id and site_id_source: + site = utils.iri_patterns.site_iri(site_id_source, site_id) else: site = None + # When both existingBDRSiteIRI and siteID+siteIDSource are provided, + # the site gets a schema:identifier with this datatype. + if existing_site_iri and site_id and site_id_source: + site_id_datatype = utils.iri_patterns.datatype_iri("siteID", site_id_source) + site_id_datatype_attribution = utils.iri_patterns.attribution_iri( + base_iri, "resourceProvider", site_id_source + ) + site_id_datatype_agent = utils.iri_patterns.agent_iri("org", site_id_source) + else: + site_id_datatype = None + site_id_datatype_attribution = None + site_id_datatype_agent = None + # Conditionally create uri dependent on siteVisitID field. site_visit_id: str | None = row["siteVisitID"] if site_visit_id: @@ -1207,7 +1224,26 @@ def apply_mapping_row( # Add site self.add_site( uri=site, - dataset=dataset, + site_id=site_id, + site_id_datatype=site_id_datatype, + graph=graph, + ) + + self.add_site_id_datatype( + uri=site_id_datatype, + site_id_source=site_id_source, + site_id_datatype_attribution=site_id_datatype_attribution, + graph=graph, + ) + self.add_attribution( + uri=site_id_datatype_attribution, + provider=site_id_datatype_agent, + provider_role_type=DATA_ROLE_RESOURCE_PROVIDER, + graph=graph, + ) + self.add_site_id_datatype_agent( + uri=site_id_datatype_agent, + site_id_source=site_id_source, graph=graph, ) @@ -4116,15 +4152,18 @@ def add_organism_quantity_value( def add_site( self, + *, uri: rdflib.URIRef | None, - dataset: rdflib.URIRef, + site_id: str | None, + site_id_datatype: rdflib.URIRef | None, graph: rdflib.Graph, ) -> None: """Adds site to the graph. Args: uri (rdflib.URIRef | None): URI to use if site provided else None. - dataset (rdflib.URIRef): The dataset which the data belongs. + site_id: Value of siteID field from the Row. + site_id_datatype: Datatype to use for the site id literal. graph (rdflib.URIRef): Graph to be modified. """ # Check site uri exists @@ -4134,9 +4173,64 @@ def add_site( # Add site information to graph graph.add((uri, a, utils.namespaces.TERN.Site)) graph.add((uri, a, utils.namespaces.TERN.FeatureOfInterest)) - graph.add((uri, rdflib.SDO.isPartOf, dataset)) graph.add((uri, utils.namespaces.TERN.featureType, vocabs.site_type.SITE.iri)) + if site_id_datatype is not None and site_id: + graph.add((uri, rdflib.SDO.identifier, rdflib.Literal(site_id, datatype=site_id_datatype))) + + def add_site_id_datatype( + self, + *, + uri: rdflib.URIRef | None, + site_id_source: str | None, + site_id_datatype_attribution: rdflib.URIRef | None, + graph: rdflib.Graph, + ) -> None: + """Adds site id datatype to the graph. + + Args: + uri: Subject of the node. + site_id_source: The siteIDSource value from the row. + site_id_datatype_attribution: The datatype attribution node. + graph: Graph to be modified. + """ + # Check subject was provided + if uri is None: + return + # Add type + graph.add((uri, a, rdflib.RDFS.Datatype)) + # Add definition + graph.add((uri, rdflib.SKOS.definition, rdflib.Literal("An identifier for the site"))) + # Add label + if site_id_source: + graph.add((uri, rdflib.SKOS.prefLabel, rdflib.Literal(f"{site_id_source} Site ID"))) + # Add attribution link + if site_id_datatype_attribution: + graph.add((uri, rdflib.PROV.qualifiedAttribution, site_id_datatype_attribution)) + + def add_site_id_datatype_agent( + self, + *, + uri: rdflib.URIRef | None, + site_id_source: str | None, + graph: rdflib.Graph, + ) -> None: + """Adds the site id datatype agent to the graph. + + Args: + uri: Subject of the node. + site_id_source: The siteIDSource value from the row. + graph: Graph to be modified. + """ + # Check subject provided + if uri is None: + return + # Add type + graph.add((uri, a, rdflib.PROV.Agent)) + # Add name + if site_id_source: + graph.add((uri, rdflib.SDO.name, rdflib.Literal(site_id_source))) + def add_sensitivity_category_attribute( self, uri: rdflib.URIRef | None, diff --git a/abis_mapping/templates/survey_occurrence_data_v3/validators/validator.ttl b/abis_mapping/templates/survey_occurrence_data_v3/validators/validator.ttl index d5f44661..a713bdb9 100644 --- a/abis_mapping/templates/survey_occurrence_data_v3/validators/validator.ttl +++ b/abis_mapping/templates/survey_occurrence_data_v3/validators/validator.ttl @@ -16,6 +16,7 @@ bdrsh:DatatypeShape a rdfs:Class, sh:NodeShape ; sh:targetClass rdfs:Datatype ; sh:xone ( + bdrsh:SiteIDDatatypeShape bdrsh:RecordIDDatatypeShape bdrsh:RecordNumberDatatypeShape bdrsh:CatalogNumberDatatypeShape @@ -92,6 +93,32 @@ bdrsh:RecordIDDatatypeShape sh:class prov:Attribution ; ] . +bdrsh:SiteIDDatatypeShape + a sh:NodeShape ; + sh:property [ + sh:path skos:prefLabel ; + sh:datatype xsd:string ; + sh:nodeKind sh:Literal ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:pattern " Site ID$" ; + ] ; + sh:property [ + sh:path skos:definition ; + sh:datatype xsd:string ; + sh:nodeKind sh:Literal ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:pattern "^An identifier for the site$" ; + ] ; + sh:property [ + sh:path prov:qualifiedAttribution ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:node bdrsh:AttributionShape ; + sh:class prov:Attribution ; + ] . + bdrsh:AttributionShape a sh:NodeShape ; sh:property [ diff --git a/abis_mapping/templates/survey_site_data_v2/mapping.py b/abis_mapping/templates/survey_site_data_v2/mapping.py index bc762fa0..7f6cfc3e 100644 --- a/abis_mapping/templates/survey_site_data_v2/mapping.py +++ b/abis_mapping/templates/survey_site_data_v2/mapping.py @@ -236,7 +236,7 @@ def apply_mapping_row( # TERN.Site subject IRI - Note this needs to match the iri construction of the # survey site visit and occurrence template mapping, ensuring they will resolve properly. site_id: str = row["siteID"] - site = utils.iri_patterns.site_iri(base_iri, site_id) + site = utils.iri_patterns.legacy_site_iri(base_iri, site_id) # Conditionally create uris dependent on siteIDSource site_id_src: str | None = row["siteIDSource"] @@ -258,7 +258,7 @@ def apply_mapping_row( rtor_site_vocab = self.fields()["relationshipToRelatedSite"].get_vocab() if rtor_site_vocab().get(relationship_to_related_site) == rdflib.SDO.isPartOf: # Related site is defined internal to the dataset - related_site = utils.iri_patterns.site_iri(base_iri, related_site_id) + related_site = utils.iri_patterns.legacy_site_iri(base_iri, related_site_id) else: # Related site is defined outside the dataset related_site = utils.rdf.uri_or_string_literal(related_site_id) diff --git a/abis_mapping/templates/survey_site_data_v3/examples/minimal.csv b/abis_mapping/templates/survey_site_data_v3/examples/minimal.csv index a3d74b3e..4c4fdeb8 100644 --- a/abis_mapping/templates/survey_site_data_v3/examples/minimal.csv +++ b/abis_mapping/templates/survey_site_data_v3/examples/minimal.csv @@ -1,5 +1,5 @@ siteID,siteIDSource,existingBDRSiteIRI,siteType,siteName,siteDescription,habitat,relatedSiteID,relationshipToRelatedSite,locality,decimalLatitude,decimalLongitude,footprintWKT,geodeticDatum,coordinateUncertaintyInMeters,dataGeneralizations -P0,WAM,,Site,ParentSite,Footprint of study area,Closed forest,,,Cowaramup Bay Road,,,"POLYGON ((114.98 -33.85, 115.01 -33.85, 115.01 -33.87, 114.98 -33.87, 114.98 -33.85))",WGS84,50, -P1,WAM,,Plot,Plot 1,Fine woody debris.,Closed forest,P0,partOf,Cowaramup Bay Road,-33.85,114.99,"LINESTRING (114.99 -33.85, 115.00 -33.85)",WGS84,50,Coordinates rounded to the nearest 10 km for conservation concern +P0,WAM,https://example.com/site/WAM/P0,Site,ParentSite,Footprint of study area,Closed forest,,,Cowaramup Bay Road,,,"POLYGON ((114.98 -33.85, 115.01 -33.85, 115.01 -33.87, 114.98 -33.87, 114.98 -33.85))",WGS84,50, +,,https://example.com/site/WAM/P1,Plot,Plot 1,Fine woody debris.,Closed forest,P0,partOf,Cowaramup Bay Road,-33.85,114.99,"LINESTRING (114.99 -33.85, 115.00 -33.85)",WGS84,50,Coordinates rounded to the nearest 10 km for conservation concern P2,WAM,,Plot,Plot 2,Fine woody debris.,Closed forest,S0,sameAs,Cowaramup Bay Road,-33.85,114.99,"LINESTRING (114.99 -33.85, 115.00 -33.85)",WGS84,50,Coordinates rounded to the nearest 10 km for conservation concern P3,WAM,,Plot,Plot 3,Fine woody debris.,Closed forest,http://example.com/site/S0,sameAs,Cowaramup Bay Road,-33.85,114.99,"LINESTRING (114.99 -33.85, 115.00 -33.85)",WGS84,50,Coordinates rounded to the nearest 10 km for conservation concern diff --git a/abis_mapping/templates/survey_site_data_v3/examples/minimal.ttl b/abis_mapping/templates/survey_site_data_v3/examples/minimal.ttl index 6b70caca..e8433196 100644 --- a/abis_mapping/templates/survey_site_data_v3/examples/minimal.ttl +++ b/abis_mapping/templates/survey_site_data_v3/examples/minimal.ttl @@ -9,18 +9,18 @@ a schema:Collection ; schema:isPartOf ; - schema:member , - , - ; + schema:member , + , + ; schema:name "Site Collection - Data Generalizations - Coordinates rounded to the nearest 10 km for conservation concern" ; tern:hasAttribute . a schema:Collection ; schema:isPartOf ; - schema:member , - , - , - ; + schema:member , + , + , + ; schema:name "Site Collection - Habitat - Closed forest" ; tern:hasAttribute . @@ -57,61 +57,54 @@ a prov:Agent ; schema:name "WAM" . - a tern:Site ; + a tern:Site ; geo:hasGeometry _:N9466fd6e9e4c9aa92b83d28000000000 ; schema:additionalType ; schema:description "Footprint of study area" ; schema:identifier "P0"^^ ; - schema:isPartOf ; schema:name "ParentSite" ; tern:featureType ; tern:locationDescription "Cowaramup Bay Road" . - a tern:Site ; + a tern:Site ; geo:hasGeometry _:N3bba75fe5be4a400a5af80dd00000000, _:N9466fd6e9e4c9aa92b83d28000000001 ; schema:additionalType ; schema:description "Fine woody debris." ; - schema:identifier "P1"^^ ; - schema:isPartOf , - ; + schema:isPartOf ; schema:name "Plot 1" ; tern:featureType ; tern:locationDescription "Cowaramup Bay Road" . - a tern:Site ; + a tern:Dataset . + + a tern:Site ; geo:hasGeometry _:N3bba75fe5be4a400a5af80dd00000001, _:N9466fd6e9e4c9aa92b83d28000000002 ; schema:additionalType ; schema:description "Fine woody debris." ; - schema:identifier "P2"^^ ; - schema:isPartOf ; schema:name "Plot 2" ; schema:sameAs "S0" ; tern:featureType ; tern:locationDescription "Cowaramup Bay Road" . - a tern:Site ; + a tern:Site ; geo:hasGeometry _:N3bba75fe5be4a400a5af80dd00000002, _:N9466fd6e9e4c9aa92b83d28000000003 ; schema:additionalType ; schema:description "Fine woody debris." ; - schema:identifier "P3"^^ ; - schema:isPartOf ; schema:name "Plot 3" ; schema:sameAs "http://example.com/site/S0"^^xsd:anyURI ; tern:featureType ; tern:locationDescription "Cowaramup Bay Road" . - a tern:Dataset . - [] a rdf:Statement ; geo:hasGeometry [ a geo:Geometry ; geo:asWKT " POLYGON ((-33.85 114.98, -33.85 115.01, -33.87 115.01, -33.87 114.98, -33.85 114.98))"^^geo:wktLiteral ; geo:hasMetricSpatialAccuracy 5e+01 ] ; rdf:object _:N9466fd6e9e4c9aa92b83d28000000000 ; rdf:predicate geo:hasGeometry ; - rdf:subject ; + rdf:subject ; rdfs:comment "supplied as" . [] a rdf:Statement ; @@ -120,7 +113,7 @@ geo:hasMetricSpatialAccuracy 5e+01 ] ; rdf:object _:N9466fd6e9e4c9aa92b83d28000000001 ; rdf:predicate geo:hasGeometry ; - rdf:subject ; + rdf:subject ; rdfs:comment "supplied as" . [] a rdf:Statement ; @@ -129,7 +122,7 @@ geo:hasMetricSpatialAccuracy 5e+01 ] ; rdf:object _:N9466fd6e9e4c9aa92b83d28000000002 ; rdf:predicate geo:hasGeometry ; - rdf:subject ; + rdf:subject ; rdfs:comment "supplied as" . [] a rdf:Statement ; @@ -138,7 +131,7 @@ geo:hasMetricSpatialAccuracy 5e+01 ] ; rdf:object _:N9466fd6e9e4c9aa92b83d28000000003 ; rdf:predicate geo:hasGeometry ; - rdf:subject ; + rdf:subject ; rdfs:comment "supplied as" . [] a rdf:Statement ; @@ -147,7 +140,7 @@ geo:hasMetricSpatialAccuracy 5e+01 ] ; rdf:object _:N3bba75fe5be4a400a5af80dd00000000 ; rdf:predicate geo:hasGeometry ; - rdf:subject ; + rdf:subject ; rdfs:comment "supplied as" . [] a rdf:Statement ; @@ -156,7 +149,7 @@ geo:hasMetricSpatialAccuracy 5e+01 ] ; rdf:object _:N3bba75fe5be4a400a5af80dd00000001 ; rdf:predicate geo:hasGeometry ; - rdf:subject ; + rdf:subject ; rdfs:comment "supplied as" . [] a rdf:Statement ; @@ -165,7 +158,7 @@ geo:hasMetricSpatialAccuracy 5e+01 ] ; rdf:object _:N3bba75fe5be4a400a5af80dd00000002 ; rdf:predicate geo:hasGeometry ; - rdf:subject ; + rdf:subject ; rdfs:comment "supplied as" . _:N3bba75fe5be4a400a5af80dd00000000 a geo:Geometry ; diff --git a/abis_mapping/templates/survey_site_data_v3/mapping.py b/abis_mapping/templates/survey_site_data_v3/mapping.py index 7f96d042..130dfed7 100644 --- a/abis_mapping/templates/survey_site_data_v3/mapping.py +++ b/abis_mapping/templates/survey_site_data_v3/mapping.py @@ -251,12 +251,20 @@ def apply_mapping_row( """ # TERN.Site subject IRI - Note this needs to match the iri construction of the # survey site visit and occurrence template mapping, ensuring they will resolve properly. + # If existingBDRSiteIRI is specified, just use that as-is for the IRI. site_id: str | None = row["siteID"] - site = utils.iri_patterns.site_iri(base_iri, site_id) # type: ignore[arg-type] # TODO fix when doing mapping - - # Conditionally create uris dependent on siteIDSource site_id_src: str | None = row["siteIDSource"] - if site_id_src: + existing_site_iri: str | None = row["existingBDRSiteIRI"] + if existing_site_iri: + site = rdflib.URIRef(existing_site_iri) + elif site_id and site_id_src: + site = utils.iri_patterns.site_iri(site_id_src, site_id) + else: + raise ValueError("Invalid row missing SiteID and existingBDRSiteIRI") + + # When both existingBDRSiteIRI and siteID+siteIDSource are provided, + # the site gets a schema:identifier with this datatype. + if existing_site_iri and site_id and site_id_src: site_id_datatype = utils.iri_patterns.datatype_iri("siteID", site_id_src) site_id_agent = utils.iri_patterns.agent_iri("org", site_id_src) site_id_attribution = utils.iri_patterns.attribution_iri(base_iri, "resourceProvider", site_id_src) @@ -274,7 +282,7 @@ def apply_mapping_row( rtor_site_vocab = self.fields()["relationshipToRelatedSite"].get_vocab() if rtor_site_vocab().get(relationship_to_related_site) == rdflib.SDO.isPartOf: # Related site is defined internal to the dataset - related_site = utils.iri_patterns.site_iri(base_iri, related_site_id) + related_site = utils.iri_patterns.legacy_site_iri(base_iri, related_site_id) else: # Related site is defined outside the dataset related_site = utils.rdf.uri_or_string_literal(related_site_id) @@ -446,7 +454,6 @@ def add_site( base_iri: Namespace used to construct IRIs """ # Extract relevant values - site_id: str | None = row["siteID"] site_name = row["siteName"] site_type = row["siteType"] site_description = row["siteDescription"] @@ -455,12 +462,11 @@ def add_site( # Add type graph.add((uri, a, utils.namespaces.TERN.Site)) - # Add dataset - graph.add((uri, rdflib.SDO.isPartOf, dataset)) - - # Add siteID - dt = site_id_datatype if site_id_datatype is not None else rdflib.XSD.string - graph.add((uri, rdflib.SDO.identifier, rdflib.Literal(site_id, datatype=dt))) + # Add siteID schema:identifier property, only when both existingBDRSiteIRI + # and siteID+siteIDSource are provided. + site_id: str | None = row["siteID"] + if site_id and site_id_datatype is not None: + graph.add((uri, rdflib.SDO.identifier, rdflib.Literal(site_id, datatype=site_id_datatype))) # Add related site if provided if related_site is not None and (relationship_to_related_site := row["relationshipToRelatedSite"]): diff --git a/abis_mapping/templates/survey_site_visit_data_v2/mapping.py b/abis_mapping/templates/survey_site_visit_data_v2/mapping.py index d2c46610..ce0b9934 100644 --- a/abis_mapping/templates/survey_site_visit_data_v2/mapping.py +++ b/abis_mapping/templates/survey_site_visit_data_v2/mapping.py @@ -237,7 +237,7 @@ def apply_mapping_row( # TERN.Site subject IRI - Note this needs to match the iri construction of the # survey site and occurrence template mapping, ensuring they will resolve properly. - uri_site = utils.iri_patterns.site_iri(base_iri, row_site_id) + uri_site = utils.iri_patterns.legacy_site_iri(base_iri, row_site_id) # Create TERN survey IRI from surveyID field row_survey_id: str | None = row["surveyID"] diff --git a/abis_mapping/templates/survey_site_visit_data_v3/examples/minimal.csv b/abis_mapping/templates/survey_site_visit_data_v3/examples/minimal.csv index 54e16c72..47d62d40 100644 --- a/abis_mapping/templates/survey_site_visit_data_v3/examples/minimal.csv +++ b/abis_mapping/templates/survey_site_visit_data_v3/examples/minimal.csv @@ -1,4 +1,4 @@ surveyID,siteID,siteIDSource,existingBDRSiteIRI,siteVisitID,siteVisitStart,siteVisitEnd,visitOrgs,visitObservers,condition,targetTaxonomicScope,protocolName,protocolDescription,samplingEffortValue,samplingEffortUnit -TIS-24-03,P1,WAM,,TIS-24-03-P1-01,2024-03-12,2024-04-04,WAM | DBCA,ORCID00001 | ORCID00002,dry,new_taxon,wet pitfall trap,10 x square buckets of size 20 x 20 x 15 cm. Propylene glycol.,240,trap nights -TIS-24-03,P1,WAM,,TIS-24-03-P1-02,2024-03-12,2024-03-12,WAM,ORCID00001,moist leaf litter after recent rain,invertebrate,litter sifting,50 cm diameter sifter with 5 mm mesh. Litter samles taken ~1 metre from each pitfall trap,10,sifts -TIS-24-03,P1,WAM,,TIS-24-03-P1-03,2024-03-12,,WAM,ORCID00003,,bird,human observation,,, +TIS-24-03,P1,WAM,https://example.com/site/P1,TIS-24-03-P1-01,2024-03-12,2024-04-04,WAM | DBCA,ORCID00001 | ORCID00002,dry,new_taxon,wet pitfall trap,10 x square buckets of size 20 x 20 x 15 cm. Propylene glycol.,240,trap nights +TIS-24-03,,,https://example.com/site/P2,TIS-24-03-P2-01,2024-03-12,2024-03-12,WAM,ORCID00001,moist leaf litter after recent rain,invertebrate,litter sifting,50 cm diameter sifter with 5 mm mesh. Litter samles taken ~1 metre from each pitfall trap,10,sifts +TIS-24-03,P3,WAM,,TIS-24-03-P3-01,2024-03-12,,WAM,ORCID00003,,bird,human observation,,, diff --git a/abis_mapping/templates/survey_site_visit_data_v3/examples/minimal.ttl b/abis_mapping/templates/survey_site_visit_data_v3/examples/minimal.ttl index 2b840065..b5ce687c 100644 --- a/abis_mapping/templates/survey_site_visit_data_v3/examples/minimal.ttl +++ b/abis_mapping/templates/survey_site_visit_data_v3/examples/minimal.ttl @@ -10,7 +10,7 @@ a schema:Collection ; schema:isPartOf ; - schema:member ; + schema:member ; schema:name "Site Visit Collection - Sampling Effort - 10 sifts" ; tern:hasAttribute . @@ -22,13 +22,13 @@ a schema:Collection ; schema:isPartOf ; - schema:member ; + schema:member ; schema:name "Site Visit Collection - Target Taxonomic Scope - bird" ; tern:hasAttribute . a schema:Collection ; schema:isPartOf ; - schema:member ; + schema:member ; schema:name "Site Visit Collection - Target Taxonomic Scope - invertebrate" ; tern:hasAttribute . @@ -43,17 +43,22 @@ skos:prefLabel "WAM Site ID" ; prov:qualifiedAttribution . - a tern:SiteVisit ; + a tern:Site ; + schema:identifier "P1"^^ . + + a tern:Site . + + a tern:SiteVisit ; time:hasTime [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; time:inXSDDate "2024-03-12"^^xsd:date ] ] ; - prov:hadPlan ; + prov:hadPlan ; prov:wasAssociatedWith , ; - schema:identifier "TIS-24-03-P1-03" ; + schema:identifier "TIS-24-03-P3-01" ; schema:isPartOf , ; - tern:hasSite . + tern:hasSite . a tern:Attribute ; schema:isPartOf ; @@ -136,12 +141,12 @@ schema:description "10 x square buckets of size 20 x 20 x 15 cm. Propylene glycol." ; schema:isPartOf . - a prov:Plan ; + a prov:Plan ; sosa:usedProcedure ; schema:description "50 cm diameter sifter with 5 mm mesh. Litter samles taken ~1 metre from each pitfall trap" ; schema:isPartOf . - a prov:Plan ; + a prov:Plan ; sosa:usedProcedure ; schema:isPartOf . @@ -157,6 +162,8 @@ prov:Person ; schema:name "ORCID00003" . + a tern:Site . + a tern:SiteVisit ; time:hasTime [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; @@ -171,32 +178,28 @@ schema:identifier "TIS-24-03-P1-01" ; schema:isPartOf , ; - tern:hasSite ; + tern:hasSite ; tern:siteDescription "dry" . - a tern:SiteVisit ; + a tern:SiteVisit ; time:hasTime [ a time:TemporalEntity ; time:hasBeginning [ a time:Instant ; time:inXSDDate "2024-03-12"^^xsd:date ] ; time:hasEnd [ a time:Instant ; time:inXSDDate "2024-03-12"^^xsd:date ] ] ; - prov:hadPlan ; + prov:hadPlan ; prov:wasAssociatedWith , ; - schema:identifier "TIS-24-03-P1-02" ; + schema:identifier "TIS-24-03-P2-01" ; schema:isPartOf , ; - tern:hasSite ; + tern:hasSite ; tern:siteDescription "moist leaf litter after recent rain" . a prov:Agent, prov:Person ; schema:name "ORCID00001" . - a tern:Site ; - schema:identifier "P1"^^ ; - schema:isPartOf . - a tern:Survey ; schema:isPartOf . diff --git a/abis_mapping/templates/survey_site_visit_data_v3/mapping.py b/abis_mapping/templates/survey_site_visit_data_v3/mapping.py index 295ac4d0..1e378089 100644 --- a/abis_mapping/templates/survey_site_visit_data_v3/mapping.py +++ b/abis_mapping/templates/survey_site_visit_data_v3/mapping.py @@ -243,12 +243,9 @@ def apply_mapping_row( # variables starting with uri_ are constructed URIs. row_site_visit_id: str | None = row["siteVisitID"] - row_site_id: str | None = row["siteID"] # should always have these mandatory fields, skip if not if not row_site_visit_id: return - if not row_site_id: - return # Part 1: Construct URIs from Row @@ -258,7 +255,16 @@ def apply_mapping_row( # TERN.Site subject IRI - Note this needs to match the iri construction of the # survey site and occurrence template mapping, ensuring they will resolve properly. - uri_site = utils.iri_patterns.site_iri(base_iri, row_site_id) + # If existingBDRSiteIRI is specified, just use that as-is for the IRI. + row_site_id: str | None = row["siteID"] + row_site_id_source: str | None = row["siteIDSource"] + row_existing_site_iri: str | None = row["existingBDRSiteIRI"] + if row_existing_site_iri: + uri_site = rdflib.URIRef(row_existing_site_iri) + elif row_site_id and row_site_id_source: + uri_site = utils.iri_patterns.site_iri(row_site_id_source, row_site_id) + else: + raise ValueError("Invalid row missing SiteID and existingBDRSiteIRI") # Create TERN survey IRI from surveyID field row_survey_id: str = row["surveyID"] @@ -267,9 +273,9 @@ def apply_mapping_row( # URI for the Site Visit Plan uri_site_visit_plan = utils.iri_patterns.plan_iri(base_iri, "visit", row_site_visit_id) - # URIs based on the siteIDSource - row_site_id_source: str | None = row["siteIDSource"] - if row_site_id_source: + # When both existingBDRSiteIRI and siteID+siteIDSource are provided, + # the site gets a schema:identifier with this datatype. + if row_existing_site_iri and row_site_id and row_site_id_source: uri_site_id_datatype = utils.iri_patterns.datatype_iri("siteID", row_site_id_source) uri_site_id_datatype_attribution = utils.iri_patterns.attribution_iri( base_iri, "resourceProvider", row_site_id_source @@ -372,7 +378,6 @@ def apply_mapping_row( self.add_site( uri=uri_site, uri_site_id_datatype=uri_site_id_datatype, - dataset=dataset, row=row, graph=graph, ) @@ -560,7 +565,6 @@ def add_site( *, uri: rdflib.URIRef, uri_site_id_datatype: rdflib.URIRef | None, - dataset: rdflib.URIRef, row: frictionless.Row, graph: rdflib.Graph, ) -> None: @@ -576,12 +580,11 @@ def add_site( # Add class graph.add((uri, a, utils.namespaces.TERN.Site)) - # Add siteID literal - dt = uri_site_id_datatype or rdflib.XSD.string - graph.add((uri, rdflib.SDO.identifier, rdflib.Literal(row["siteID"], datatype=dt))) - - # Add to dataset - graph.add((uri, rdflib.SDO.isPartOf, dataset)) + # Add siteID schema:identifier property, only when both existingBDRSiteIRI + # and siteID+siteIDSource are provided. + row_site_id: str | None = row["siteID"] + if row_site_id and uri_site_id_datatype is not None: + graph.add((uri, rdflib.SDO.identifier, rdflib.Literal(row_site_id, datatype=uri_site_id_datatype))) def add_site_id_datatype( self, diff --git a/abis_mapping/utils/iri_patterns.py b/abis_mapping/utils/iri_patterns.py index 39b21b52..65714f99 100644 --- a/abis_mapping/utils/iri_patterns.py +++ b/abis_mapping/utils/iri_patterns.py @@ -41,13 +41,16 @@ def survey_iri( ) -def site_iri( +# TODO: Remove once SSD v2 is removed. +def legacy_site_iri( base_iri: rdflib.Namespace, site_id: str, /, ) -> rdflib.URIRef: """Get the IRI for the tern:Site node, constructed from the siteID field. + # NOTE this pattern is deprecated, is no longer used from SSD v3 onwards. + This IRI is used in mapping multiple Systematic Survey template, and needs to be the same for all of them. @@ -61,6 +64,31 @@ def site_iri( return utils.rdf.uri_quoted(base_iri, "Site/{site_id}", site_id=site_id) +def site_iri( + site_id_source: str, + site_id: str, + /, +) -> rdflib.URIRef: + """Get the IRI for the tern:Site node, constructed from the siteID+siteIDSource fields. + + This IRI is used in mapping multiple Systematic Survey template, + and needs to be the same for all of them. + + Args: + site_id: The siteID field from the template. + site_id_source: The siteIDSource field from the template. + + Returns: + The IRI for the tern:Survey node. + """ + return utils.rdf.uri_quoted( + utils.namespaces.DATASET_BDR, + "site/{site_id_source}/{site_id}", + site_id_source=site_id_source, + site_id=site_id, + ) + + def site_visit_iri( base_iri: rdflib.Namespace, site_visit_id: str,