Skip to content

Commit

Permalink
Update MITAardvark 'rights' field method to support access filter (#114)
Browse files Browse the repository at this point in the history
* Update MITAardvark 'rights' field method to support access filter

Why these changes are being introduced:
* Enable filtering TIMDEX records based on "Access to files" by
providing a static, aggregatable string value indicating the category
for a given record.

How this addresses that need:
* Update MITAardvark.get_rights field method
   * Accept MITAardvark.source as an argument to determine ownership
   * Use "dct_accessRights_s" to determine access category
* Update Rights.kind for MITAardvark.dct_accessRights_s field to 'Access rights'

Side effects of this change:
* Need to determine whether OpenSearch can limit aggregation to a field that is
filtered by a subfield (i.e., Rights.kind) and make necessary updates to timdex-ui.
* timdex-index-manager (TIM) needs to update `rights.description` as a multifield with a keyword value to enable aggregation.

Relevant ticket(s):
* https://mitlibraries.atlassian.net/browse/GDT-138
  • Loading branch information
jonavellecuerdo authored Feb 21, 2024
1 parent 06de472 commit c75fd04
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 10 deletions.
2 changes: 1 addition & 1 deletion tests/fixtures/aardvark/aardvark_record_all_fields.jsonl
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"id": "mit:123", "dcat_bbox": "ENVELOPE(-111.1, -104.0, 45.0, 40.9)", "dcat_keyword_sm": ["Country"], "dcat_theme_sm": ["Political boundaries"], "dct_accessRights_s": "Access note", "dct_alternative_sm": ["Alternate title"], "dct_creator_sm": ["Smith, Jane", "Smith, John"], "dct_description_sm": ["A description"], "dct_format_s": "Shapefile", "dct_identifier_sm": ["abc123"], "dct_issued_s": "2003-10-23", "dct_language_sm": ["eng"], "dct_license_sm": ["http://license.license", "http://another_license.another_license"], "dct_publisher_sm": ["ML InfoMap (Firm)"], "dct_references_s": "{\"http://schema.org/downloadUrl\": [{\"label\": \"Source Metadata\", \"url\": \"https://cdn.dev1.mitlibrary.net/geo/public/GISPORTAL_GISOWNER01_BOSTONWATER95.source.fgdc.xml\"}, {\"label\": \"Aardvark Metadata\", \"url\": \"https://cdn.dev1.mitlibrary.net/geo/public/GISPORTAL_GISOWNER01_BOSTONWATER95.normalized.aardvark.json\"}, {\"label\": \"Data\", \"url\": \"https://cdn.dev1.mitlibrary.net/geo/public/GISPORTAL_GISOWNER01_BOSTONWATER95.zip\"}], \"http://schema.org/url\": \"https://geodata.libraries.mit.edu/record/gismit:GISPORTAL_GISOWNER01_BOSTONWATER95\"}", "dct_rights_sm": ["Some person has the rights"], "dct_rightsHolder_sm": ["The person with the rights", "Another person with the rights"], "dct_spatial_sm": ["Some city, Some country"], "dct_subject_sm": ["Geography", "Earth"], "dct_temporal_sm": ["1943", "1979"], "dct_title_s": "Test title 1", "gbl_dateRange_drsim": ["[1943 TO 1946]"], "gbl_displayNote_sm": ["Danger: This text will be displayed in a red box","Info: This text will be displayed in a blue box","Tip: This text will be displayed in a green box","Warning: This text will be displayed in a yellow box","This is text without a tag and it will be assigned default 'note' style"], "gbl_indexYear_im": [1943,1944,1945,1946], "gbl_resourceClass_sm": ["Dataset"], "gbl_resourceType_sm": ["Vector data"], "gbl_suppressed_b": false, "locn_geometry": "ENVELOPE(-111.1, -104.0, 45.0, 40.9)", "schema_provider_s": "MIT"}
{"id": "mit:123", "dcat_bbox": "ENVELOPE(-111.1, -104.0, 45.0, 40.9)", "dcat_keyword_sm": ["Country"], "dcat_theme_sm": ["Political boundaries"], "dct_accessRights_s": "Access note", "dct_alternative_sm": ["Alternate title"], "dct_creator_sm": ["Smith, Jane", "Smith, John"], "dct_description_sm": ["A description"], "dct_format_s": "Shapefile", "dct_identifier_sm": ["abc123"], "dct_issued_s": "2003-10-23", "dct_language_sm": ["eng"], "dct_license_sm": ["http://license.license", "http://another_license.another_license"], "dct_publisher_sm": ["ML InfoMap (Firm)"], "dct_references_s": "{\"http://schema.org/downloadUrl\": [{\"label\": \"Source Metadata\", \"url\": \"https://cdn.dev1.mitlibrary.net/geo/public/GISPORTAL_GISOWNER01_BOSTONWATER95.source.fgdc.xml\"}, {\"label\": \"Aardvark Metadata\", \"url\": \"https://cdn.dev1.mitlibrary.net/geo/public/GISPORTAL_GISOWNER01_BOSTONWATER95.normalized.aardvark.json\"}, {\"label\": \"Data\", \"url\": \"https://cdn.dev1.mitlibrary.net/geo/public/GISPORTAL_GISOWNER01_BOSTONWATER95.zip\"}], \"http://schema.org/url\": \"https://geodata.libraries.mit.edu/record/gismit:GISPORTAL_GISOWNER01_BOSTONWATER95\"}", "dct_rights_sm": ["Some person has the rights"], "dct_rightsHolder_sm": ["The person with the rights", "Another person with the rights"], "dct_spatial_sm": ["Some city, Some country"], "dct_subject_sm": ["Geography", "Earth"], "dct_temporal_sm": ["1943", "1979"], "dct_title_s": "Test title 1", "gbl_dateRange_drsim": ["[1943 TO 1946]"], "gbl_displayNote_sm": ["Danger: This text will be displayed in a red box","Info: This text will be displayed in a blue box","Tip: This text will be displayed in a green box","Warning: This text will be displayed in a yellow box","This is text without a tag and it will be assigned default 'note' style"], "gbl_indexYear_im": [1943,1944,1945,1946], "gbl_resourceClass_sm": ["Dataset"], "gbl_resourceType_sm": ["Vector data"], "gbl_suppressed_b": false, "locn_geometry": "ENVELOPE(-111.1, -104.0, 45.0, 40.9)", "schema_provider_s": "MIT"}
66 changes: 63 additions & 3 deletions tests/sources/json/test_aardvark.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def test_aardvark_transform_returns_timdex_record(aardvark_records):
citation="Test title 1. Geospatial data. "
"https://geodata.libraries.mit.edu/record/abc:123",
content_type=["Geospatial data"],
rights=[timdex.Rights(description="Access rights", kind="Access")],
rights=[timdex.Rights(description="Access rights", kind="Access rights")],
links=[
timdex.Link(
url="https://geodata.libraries.mit.edu/record/abc:123",
Expand Down Expand Up @@ -299,8 +299,68 @@ def test_aardvark_get_publication_information_success(aardvark_record_all_fields


def test_aardvark_get_rights_success(aardvark_record_all_fields):
assert MITAardvark.get_rights(next(aardvark_record_all_fields)) == [
timdex.Rights(description="Access note", kind="Access"),
assert MITAardvark.get_rights("source", next(aardvark_record_all_fields)) == [
timdex.Rights(description="Access note", kind="Access rights"),
timdex.Rights(uri="http://license.license"),
timdex.Rights(uri="http://another_license.another_license"),
timdex.Rights(description="Some person has the rights"),
timdex.Rights(
description="The person with the rights. Another person with the rights"
),
]


def test_aardvark_get_rights_mit_restricted_success(aardvark_record_all_fields):
aardvark_record = next(aardvark_record_all_fields)
aardvark_record["dct_accessRights_s"] = "Restricted"
assert MITAardvark.get_rights("gismit", aardvark_record) == [
timdex.Rights(description="Restricted", kind="Access rights"),
timdex.Rights(description="MIT authentication", kind="Access to files"),
timdex.Rights(uri="http://license.license"),
timdex.Rights(uri="http://another_license.another_license"),
timdex.Rights(description="Some person has the rights"),
timdex.Rights(
description="The person with the rights. Another person with the rights"
),
]


def test_aardvark_get_rights_mit_public_success(aardvark_record_all_fields):
aardvark_record = next(aardvark_record_all_fields)
aardvark_record["dct_accessRights_s"] = "Public"
assert MITAardvark.get_rights("gismit", aardvark_record) == [
timdex.Rights(description="Public", kind="Access rights"),
timdex.Rights(description="Free/open to all", kind="Access to files"),
timdex.Rights(uri="http://license.license"),
timdex.Rights(uri="http://another_license.another_license"),
timdex.Rights(description="Some person has the rights"),
timdex.Rights(
description="The person with the rights. Another person with the rights"
),
]


def test_aardvark_get_rights_external_restricted_success(aardvark_record_all_fields):
aardvark_record = next(aardvark_record_all_fields)
aardvark_record["dct_accessRights_s"] = "Restricted"
assert MITAardvark.get_rights("gisogm", aardvark_record) == [
timdex.Rights(description="Restricted", kind="Access rights"),
timdex.Rights(description="Not owned by MIT", kind="Access to files"),
timdex.Rights(uri="http://license.license"),
timdex.Rights(uri="http://another_license.another_license"),
timdex.Rights(description="Some person has the rights"),
timdex.Rights(
description="The person with the rights. Another person with the rights"
),
]


def test_aardvark_get_rights_external_public_success(aardvark_record_all_fields):
aardvark_record = next(aardvark_record_all_fields)
aardvark_record["dct_accessRights_s"] = "Public"
assert MITAardvark.get_rights("gisogm", aardvark_record) == [
timdex.Rights(description="Public", kind="Access rights"),
timdex.Rights(description="Not owned by MIT", kind="Access to files"),
timdex.Rights(uri="http://license.license"),
timdex.Rights(uri="http://another_license.another_license"),
timdex.Rights(description="Some person has the rights"),
Expand Down
32 changes: 26 additions & 6 deletions transmogrifier/sources/json/aardvark.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def get_optional_fields(self, source_record: dict) -> dict | None:
# related_items not used in MITAardvark

# rights
fields["rights"] = self.get_rights(source_record) or None
fields["rights"] = self.get_rights(self.source, source_record) or None

# subjects
fields["subjects"] = self.get_subjects(source_record) or None
Expand Down Expand Up @@ -385,16 +385,36 @@ def get_publication_information(source_record: dict) -> list[str]:
return publication_information

@staticmethod
def get_rights(source_record: dict) -> list[timdex.Rights]:
def get_rights(source: str, source_record: dict) -> list[timdex.Rights]:
"""Get values from source record for TIMDEX rights field."""
rights = []
kind_access_to_files = "Access to files"

if "dct_accessRights_s" in source_record:
rights.append(
timdex.Rights(
description=source_record["dct_accessRights_s"],
kind="Access rights",
)
)

if source == "gisogm":
rights.append(
timdex.Rights(
description=source_record["dct_accessRights_s"], kind="Access"
)
timdex.Rights(description="Not owned by MIT", kind=kind_access_to_files)
)
elif source == "gismit":
if source_record["dct_accessRights_s"] == "Restricted":
rights.append(
timdex.Rights(
description="MIT authentication",
kind=kind_access_to_files,
)
)
else:
rights.append(
timdex.Rights(
description="Free/open to all", kind=kind_access_to_files
)
)

rights.extend(
[
Expand Down

0 comments on commit c75fd04

Please sign in to comment.