Skip to content

Commit

Permalink
Merge pull request #31 from ckan/master
Browse files Browse the repository at this point in the history
Update from ckan/ckanext-dcat
  • Loading branch information
mjanez authored Sep 19, 2024
2 parents aa26724 + f18ba7a commit 2302ff8
Show file tree
Hide file tree
Showing 18 changed files with 159 additions and 76 deletions.
5 changes: 3 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

## [Unreleased](https://github.com/ckan/ckanext-dcat/compare/v2.0.0...HEAD)

* Add support for hydra collection type PartialCollectionView
* Fix DCAT date validator on empty values ([#297](https://github.com/ckan/ckanext-dcat/pull/297))
* Add support for hydra collection type PartialCollectionView ([#299](https://github.com/ckan/ckanext-dcat/pull/299))

## [v2.0.0](https://github.com/ckan/ckanext-dcat/compare/v1.7.0...v2.0.0) - 2024-08-30

Expand Down Expand Up @@ -117,7 +118,7 @@

## [v1.1.0](https://github.com/ckan/ckanext-dcat/compare/v1.0.0...v1.1.0) - 2020-03-12

* Python 3 support and new pytest based test suite ([#174](https://github.com/ckan/ckanext-dcat/pull/174))
* Python 3 support and new pytest based test suite ([#174](https://github.com/ckan/ckanext-dcat/pull/174))painful
* Fix `after_show - set_titles` in plugins.py ([#172](https://github.com/ckan/ckanext-dcat/pull/172))
* Add support for DCT.rightsStatement in DCT.accessRights and DCT.rights ([#177](https://github.com/ckan/ckanext-dcat/pull/177))
* Add support for additional vcard representations ([#178](https://github.com/ckan/ckanext-dcat/pull/178))
Expand Down
10 changes: 9 additions & 1 deletion ckanext/dcat/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,15 @@ def dcat_to_ckan(dcat_dict):
package_dict['extras'].append({'key': 'dcat_publisher_name', 'value': dcat_publisher})
elif isinstance(dcat_publisher, dict) and dcat_publisher.get('name'):
package_dict['extras'].append({'key': 'dcat_publisher_name', 'value': dcat_publisher.get('name')})
package_dict['extras'].append({'key': 'dcat_publisher_email', 'value': dcat_publisher.get('mbox')})

if dcat_publisher.get('mbox'):
package_dict['extras'].append({'key': 'dcat_publisher_email', 'value': dcat_publisher.get('mbox')})

if dcat_publisher.get('identifier'):
package_dict['extras'].append({
'key': 'dcat_publisher_id',
'value': dcat_publisher.get('identifier') # This could be a URI like https://ror.org/05wg1m734
})

package_dict['extras'].append({
'key': 'language',
Expand Down
3 changes: 2 additions & 1 deletion ckanext/dcat/processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,8 @@ def _get_from_extra(key):
('name', Literal, FOAF.name, True,),
('email', Literal, FOAF.mbox, False,),
('url', URIRef, FOAF.homepage,False,),
('type', Literal, DCT.type, False,))
('type', Literal, DCT.type, False,),
('identifier', URIRef, DCT.identifier, False,))

_pub = _get_from_extra('source_catalog_publisher')
if _pub:
Expand Down
2 changes: 2 additions & 0 deletions ckanext/dcat/profiles/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,8 @@ def _publisher(self, subject, predicate):

publisher["type"] = self._object_value(agent, DCT.type)

publisher['identifier'] = self._object_value(agent, DCT.identifier)

return publisher

def _contact_details(self, subject, predicate):
Expand Down
5 changes: 4 additions & 1 deletion ckanext/dcat/profiles/euro_dcat_ap_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def _parse_dataset_base(self, dataset_dict, dataset_ref):

# Publisher
publisher = self._publisher(dataset_ref, DCT.publisher)
for key in ("uri", "name", "email", "url", "type"):
for key in ("uri", "name", "email", "url", "type", "identifier"):
if publisher.get(key):
dataset_dict["extras"].append(
{"key": "publisher_{0}".format(key), "value": publisher.get(key)}
Expand Down Expand Up @@ -372,6 +372,7 @@ def _graph_from_dataset_base(self, dataset_dict, dataset_ref):
"email": self._get_dataset_value(dataset_dict, "publisher_email"),
"url": self._get_dataset_value(dataset_dict, "publisher_url"),
"type": self._get_dataset_value(dataset_dict, "publisher_type"),
"identifier": self._get_dataset_value(dataset_dict, "publisher_identifier"),
}
elif dataset_dict.get("organization"):
# Fall back to dataset org
Expand All @@ -396,6 +397,7 @@ def _graph_from_dataset_base(self, dataset_dict, dataset_ref):
"email": org_dict.get("email"),
"url": org_dict.get("url"),
"type": org_dict.get("dcat_type"),
"identifier": org_dict.get("identifier"),
}
# Add to graph
if publisher_ref:
Expand All @@ -406,6 +408,7 @@ def _graph_from_dataset_base(self, dataset_dict, dataset_ref):
("email", FOAF.mbox, None, Literal),
("url", FOAF.homepage, None, URIRef),
("type", DCT.type, None, URIRefOrLiteral),
("identifier", DCT.identifier, None, URIRefOrLiteral),
]
self._add_triples_from_dict(publisher_details, publisher_ref, items)

Expand Down
7 changes: 7 additions & 0 deletions ckanext/dcat/profiles/euro_dcat_ap_scheming.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,13 @@ def _not_empty_dict(data_dict):
_type=URIRef,
value_modifier=self._add_mailto,
)
self._add_triple_from_dict(
publisher,
publisher_ref,
DCT.identifier,
"identifier",
_type=URIRefOrLiteral
)

temporal = dataset_dict.get("temporal_coverage")
if (
Expand Down
4 changes: 4 additions & 0 deletions ckanext/dcat/profiles/schemaorg.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,10 @@ def _publisher_graph(self, dataset_ref, dataset_dict):

self._add_triples_from_dict(dataset_dict, contact_point, items)

publisher_identifier = self._get_dataset_value(dataset_dict, "publisher_identifier")
if publisher_identifier:
self.g.add((publisher_details, SCHEMA.identifier, Literal(publisher_identifier)))

def _temporal_graph(self, dataset_ref, dataset_dict):
start = self._get_dataset_value(dataset_dict, "temporal_start")
end = self._get_dataset_value(dataset_dict, "temporal_end")
Expand Down
4 changes: 4 additions & 0 deletions ckanext/dcat/schemas/dcat_ap_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ dataset_fields:

- field_name: type
label: Type

- field_name: identifier
label: Identifier
help_text: Unique identifier for the publisher, such as a ROR ID.
help_text: Entity responsible for making the dataset available.

- field_name: license_id
Expand Down
5 changes: 5 additions & 0 deletions ckanext/dcat/schemas/dcat_ap_recommended.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,11 @@ dataset_fields:

- field_name: type
label: Type

- field_name: identifier
label: Identifier
help_text: Unique identifier for the publisher, such as a ROR ID.

help_text: Entity responsible for making the dataset available.

- field_name: license_id
Expand Down
18 changes: 18 additions & 0 deletions ckanext/dcat/tests/logic/test_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,27 @@ def test_dcat_date_invalid():
invalid_values = [
"2024+07",
"not_a_date",
True
]

for value in invalid_values:
data = {key: value}
with pytest.raises(Invalid):
dcat_date(key, data, errors, {}), value


def test_dcat_date_empty_values():

key = ("some_date",)
errors = {key: []}
valid_values = [
None,
False,
""
]

for value in valid_values:
data = {key: value}
dcat_date(key, data, errors, {}), value

assert data[key] is None
2 changes: 2 additions & 0 deletions ckanext/dcat/tests/profiles/base/test_base_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,7 @@ def test_publisher_foaf(self):
<foaf:mbox>[email protected]</foaf:mbox>
<foaf:homepage>http://some.org</foaf:homepage>
<dct:type rdf:resource="http://purl.org/adms/publishertype/NonProfitOrganisation"/>
<dct:identifier rdf:resource="https://ror.org/05wg1m734"/>
</foaf:Organization>
</dct:publisher>
</rdfs:SomeClass>
Expand All @@ -666,6 +667,7 @@ def test_publisher_foaf(self):
assert publisher['email'] == '[email protected]'
assert publisher['url'] == 'http://some.org'
assert publisher['type'] == 'http://purl.org/adms/publishertype/NonProfitOrganisation'
assert publisher['identifier'] == 'https://ror.org/05wg1m734'

def test_publisher_ref(self):

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ def _get_extra_value_as_list(key):
assert _get_extra_value('publisher_email') == '[email protected]'
assert _get_extra_value('publisher_url') == 'http://some.org'
assert _get_extra_value('publisher_type') == 'http://purl.org/adms/publishertype/NonProfitOrganisation'
assert _get_extra_value('publisher_identifier') == 'https://ror.org/05wg1m734'
assert _get_extra_value('contact_name') == 'Point of Contact'
# mailto gets removed for storage and is added again on output
assert _get_extra_value('contact_email') == '[email protected]'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ def test_e2e_ckan_to_dcat(self):
"email": "[email protected]",
"url": "https://example.org",
"type": "public_body",
"identifier": "http://example.org/publisher-id",
},
],
"temporal_coverage": [
Expand Down Expand Up @@ -301,6 +302,12 @@ def test_e2e_ckan_to_dcat(self):
DCT.type,
dataset_dict["publisher"][0]["type"],
)
assert self._triple(
g,
publisher[0][2],
DCT.identifier,
URIRef(dataset_dict["publisher"][0]["identifier"])
)

temporal = [t for t in g.triples((dataset_ref, DCT.temporal, None))]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ def test_publisher_extras(self):
{'key': 'publisher_email', 'value': '[email protected]'},
{'key': 'publisher_url', 'value': 'http://example.com/publisher/home'},
{'key': 'publisher_type', 'value': 'http://purl.org/adms/publishertype/Company'},
{'key': 'publisher_identifier', 'value': 'https://ror.org/05wg1m734'},
]


Expand All @@ -121,6 +122,7 @@ def test_publisher_extras(self):
assert str(publisher) == extras['publisher_uri']
assert self._triple(g, publisher, RDF.type, SCHEMA.Organization)
assert self._triple(g, publisher, SCHEMA.name, extras['publisher_name'])
assert self._triple(g, publisher, SCHEMA.identifier, extras['publisher_identifier'])

contact_point = self._triple(g, publisher, SCHEMA.contactPoint, None)[2]
assert contact_point
Expand All @@ -144,6 +146,7 @@ def test_publisher_no_uri(self):
{'key': 'publisher_email', 'value': '[email protected]'},
{'key': 'publisher_url', 'value': 'http://example.com/publisher/home'},
{'key': 'publisher_type', 'value': 'http://purl.org/adms/publishertype/Company'},
{'key': 'publisher_identifier', 'value': 'https://ror.org/05wg1m734'},
]
}
extras = self._extras(dataset)
Expand All @@ -158,6 +161,7 @@ def test_publisher_no_uri(self):
assert isinstance(publisher, BNode)
assert self._triple(g, publisher, RDF.type, SCHEMA.Organization)
assert self._triple(g, publisher, SCHEMA.name, extras['publisher_name'])
assert self._triple(g, publisher, SCHEMA.identifier, extras['publisher_identifier'])

contact_point = self._triple(g, publisher, SCHEMA.contactPoint, None)[2]
assert contact_point
Expand Down
1 change: 1 addition & 0 deletions ckanext/dcat/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def field_labels():
'publisher_email': _('Publisher email'),
'publisher_url': _('Publisher URL'),
'publisher_type': _('Publisher type'),
'publisher_identifier': _('Publisher identifier'),
'contact_name': _('Contact name'),
'contact_email': _('Contact email'),
'contact_uri': _('Contact URI'),
Expand Down
19 changes: 16 additions & 3 deletions ckanext/dcat/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,13 @@
Invalid,
_,
)
from ckanext.scheming.validation import scheming_validator

try:
from ckanext.scheming.validation import scheming_validator
except ImportError:
def scheming_validator(func):
return func


# https://www.w3.org/TR/xmlschema11-2/#gYear
regexp_xsd_year = re.compile(
Expand Down Expand Up @@ -41,12 +47,19 @@ def is_date(value):
def dcat_date(key, data, errors, context):
value = data[key]

if isinstance(value, datetime.datetime):
if not value:
data[key] = None
return

if is_year(value) or is_year_month(value) or is_date(value):
if isinstance(value, datetime.datetime):
return

try:
if is_year(value) or is_year_month(value) or is_date(value):
return
except TypeError:
raise Invalid(_("Dates must be provided as strings or datetime objects"))

try:
parse_date(value)
except ValueError:
Expand Down
Loading

0 comments on commit 2302ff8

Please sign in to comment.