Skip to content

Commit

Permalink
Add identifier property to Distributions
Browse files Browse the repository at this point in the history
  • Loading branch information
amercader committed Oct 7, 2024
1 parent 60009fb commit 8239e1b
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 36 deletions.
50 changes: 26 additions & 24 deletions ckanext/dcat/profiles/dcat_us_3.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,27 +49,29 @@ def graph_from_catalog(self, catalog_dict, catalog_ref):

self._graph_from_catalog_base(catalog_dict, catalog_ref)

def _graph_from_dataset_v3(self, dataset_dict, dataset_ref):

# byteSize decimal -> nonNegativeInteger
for subject, predicate, object in self.g.triples((None, DCAT.byteSize, None)):
if object and object.datatype == XSD.decimal:
self.g.remove((subject, predicate, object))

self.g.add(
(
subject,
predicate,
Literal(int(object), datatype=XSD.nonNegativeInteger),
)
)

# Other identifiers
value = self._get_dict_value(dataset_dict, "alternate_identifier")
if value:
items = self._read_list_value(value)
for item in items:
identifier = BNode()
self.g.add((dataset_ref, ADMS.identifier, identifier))
self.g.add((identifier, RDF.type, ADMS.Identifier))
self.g.add((identifier, SKOS.notation, Literal(item)))
def _parse_dataset_v3_us(self, dataset_dict, dataset_ref):

for distribution_ref in self._distributions(dataset_ref):

# Distribution identifier
value = self._object_value(distribution_ref, DCT.identifier)
if value:
for resource_dict in dataset_dict.get("resources", []):
if resource_dict["distribution_ref"] == str(distribution_ref):
resource_dict["identifier"] = value

def _graph_from_dataset_v3_us(self, dataset_dict, dataset_ref):

for resource_dict in dataset_dict.get("resources", []):

distribution_ref = CleanedURIRef(resource_uri(resource_dict))

# Distribution identifier
self._add_triple_from_dict(
resource_dict,
distribution_ref,
DCT.identifier,
"identifier",
fallbacks=["guid", "id"],
_type=URIRefOrLiteral,
)
6 changes: 5 additions & 1 deletion ckanext/dcat/schemas/dcat_us_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ dataset_fields:
# Note: CKAN will generate a unique identifier for each dataset
- field_name: identifier
label: Identifier
help_text: A unique identifier of the dataset.
help_text: A unique identifier of the dataset, if not provided it will fall back to CKAN's internal id.

- field_name: frequency
label: Frequency
Expand Down Expand Up @@ -354,6 +354,10 @@ resource_fields:
display_snippet: link.html
help_text: URL that provides a direct link to a downloadable file (defaults to the standard resource URL).

- field_name: identifier
label: Identifier
help_text: A unique identifier of the dataset, if not provided it will fall back to CKAN's internal id.

- field_name: issued
label: Release date
preset: dcat_date
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def test_e2e_dcat_to_ckan(self):

# Resources: standard fields
assert resource["license"] == "http://creativecommons.org/licenses/by-nc/2.0/"
assert resource["identifier"] == "https://example.org/distributions/1"
assert resource["rights"] == "Some statement about rights"
assert resource["issued"] == "2012-05-11"
assert resource["modified"] == "2012-05-01T00:04:06"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,17 @@
DCAT_AP_PROFILES = ["dcat_us_3"]


@pytest.mark.usefixtures("with_plugins", "clean_db")
@pytest.mark.ckan_config("ckan.plugins", "dcat scheming_datasets")
@pytest.mark.ckan_config(
"scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_us_full.yaml"
)
@pytest.mark.ckan_config(
"scheming.presets",
"ckanext.scheming:presets.json ckanext.dcat.schemas:presets.yaml",
)
@pytest.mark.ckan_config("ckanext.dcat.rdf.profiles", "dcat_us_3")
class TestDCATUS3ProfileSerializeDataset(BaseSerializeTest):
@pytest.mark.usefixtures("with_plugins", "clean_db")
@pytest.mark.ckan_config("ckan.plugins", "dcat scheming_datasets")
@pytest.mark.ckan_config(
"scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_us_full.yaml"
)
@pytest.mark.ckan_config(
"scheming.presets",
"ckanext.scheming:presets.json ckanext.dcat.schemas:presets.yaml",
)
@pytest.mark.ckan_config("ckanext.dcat.rdf.profiles", "dcat_us_3")
def test_e2e_ckan_to_dcat(self):
"""
Create a dataset using the scheming schema, check that fields
Expand Down Expand Up @@ -82,7 +82,9 @@ def test_e2e_ckan_to_dcat(self):
)
assert self._triple(g, dataset_ref, DCT.type, URIRef(dataset["dcat_type"]))
assert self._triple(g, dataset_ref, ADMS.versionNotes, dataset["version_notes"])
assert self._triple(g, dataset_ref, DCT.accessRights, URIRef(dataset["access_rights"]))
assert self._triple(
g, dataset_ref, DCT.accessRights, URIRef(dataset["access_rights"])
)
assert self._triple(
g,
dataset_ref,
Expand Down Expand Up @@ -332,3 +334,53 @@ def test_e2e_ckan_to_dcat(self):
]
assert endpoint_urls == resource["access_services"][0]["endpoint_url"]

def test_distribution_identifier(self):

dataset_dict = {
"name": "test-dcat-us",
"description": "Test",
"resources": [
{
"id": "89b67e5b-d0e1-4bc3-a75a-59f21c66ebc0",
"name": "some data",
"identifier": "https://example.org/distributions/1",
}
],
}

s = RDFSerializer(profiles=DCAT_AP_PROFILES)
g = s.g

dataset_ref = s.graph_from_dataset(dataset_dict)

distribution_ref = self._triple(g, dataset_ref, DCAT.distribution, None)[2]
resource = dataset_dict["resources"][0]

assert self._triple(
g, distribution_ref, DCT.identifier, URIRef(resource["identifier"])
)

def test_distribution_identifier_falls_back_to_id(self):

dataset_dict = {
"name": "test-dcat-us",
"description": "Test",
"resources": [
{
"id": "89b67e5b-d0e1-4bc3-a75a-59f21c66ebc0",
"name": "some data",
}
],
}

s = RDFSerializer(profiles=DCAT_AP_PROFILES)
g = s.g

dataset_ref = s.graph_from_dataset(dataset_dict)

distribution_ref = self._triple(g, dataset_ref, DCAT.distribution, None)[2]
resource = dataset_dict["resources"][0]

assert self._triple(
g, distribution_ref, DCT.identifier, resource["id"]
)
1 change: 1 addition & 0 deletions examples/dcat/dataset.rdf
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@
<dcat:Distribution rdf:about="https://data.some.org/catalog/datasets/9df8df51-63db-37a8-e044-0003ba9b0d98/1">
<dct:title>Some website</dct:title>
<dct:description>A longer description</dct:description>
<dct:identifier>https://example.org/distributions/1</dct:identifier>
<dct:issued rdf:datatype="http://www.w3.org/2001/XMLSchema#date">2012-05-11</dct:issued>
<dct:modified rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2012-05-01T00:04:06</dct:modified>
<dct:license rdf:resource="http://creativecommons.org/licenses/by-nc/2.0/"/>
Expand Down

0 comments on commit 8239e1b

Please sign in to comment.