diff --git a/invenio_rdm_records/resources/serializers/csl/schema.py b/invenio_rdm_records/resources/serializers/csl/schema.py index a0cce3056..e6d0f24ca 100644 --- a/invenio_rdm_records/resources/serializers/csl/schema.py +++ b/invenio_rdm_records/resources/serializers/csl/schema.py @@ -13,6 +13,7 @@ from flask_resources.serializers import BaseSerializerSchema from marshmallow import Schema, fields, missing, pre_dump from marshmallow_utils.fields import SanitizedUnicode, StrippedHTML +from pydash import py_ from ..schemas import CommonFieldsMixin from ..utils import get_vocabulary_props @@ -62,19 +63,27 @@ class CSLJSONSchema(BaseSerializerSchema, CommonFieldsMixin): def get_type(self, obj): """Get resource type.""" + resource_type_id = py_.get(obj, "metadata.resource_type.id") + if not resource_type_id: + return missing + props = get_vocabulary_props( "resourcetypes", [ "props.csl", ], - obj["metadata"]["resource_type"]["id"], + resource_type_id, ) return props.get("csl", "article") # article is CSL "Other" def get_issued(self, obj): """Get issued dates.""" + publication_date = py_.get(obj, "metadata.publication_date") + if not publication_date: + return missing + try: - parsed = parse_edtf(obj["metadata"].get("publication_date")) + parsed = parse_edtf(publication_date) except EDTFParseException: return missing diff --git a/invenio_rdm_records/resources/serializers/datacite/schema.py b/invenio_rdm_records/resources/serializers/datacite/schema.py index 17ed58d33..82d685e41 100644 --- a/invenio_rdm_records/resources/serializers/datacite/schema.py +++ b/invenio_rdm_records/resources/serializers/datacite/schema.py @@ -20,6 +20,7 @@ from marshmallow import Schema, ValidationError, fields, missing, post_dump, validate from marshmallow_utils.fields import SanitizedUnicode from marshmallow_utils.html import strip_html +from pydash import py_ from ....proxies import current_rdm_records_service from ...serializers.ui.schema import current_default_locale @@ -206,10 +207,14 @@ class DataCite43Schema(BaseSerializerSchema): def get_type(self, obj): """Get resource type.""" + resource_type_id = py_.get(obj, "metadata.resource_type.id") + if not resource_type_id: + return missing + props = get_vocabulary_props( "resourcetypes", ["props.datacite_general", "props.datacite_type"], - obj["metadata"]["resource_type"]["id"], + resource_type_id, ) return { "resourceTypeGeneral": props.get("datacite_general", "Other"), @@ -261,8 +266,11 @@ def get_descriptions(self, obj): def get_publication_year(self, obj): """Get publication year from edtf date.""" + publication_date = py_.get(obj, "metadata.publication_date") + if not publication_date: + return missing + try: - publication_date = obj["metadata"]["publication_date"] parsed_date = parse_edtf(publication_date) return str(parsed_date.lower_strict().tm_year) except ParseException: @@ -274,7 +282,8 @@ def get_publication_year(self, obj): def get_dates(self, obj): """Get dates.""" - dates = [{"date": obj["metadata"]["publication_date"], "dateType": "Issued"}] + pub_date = py_.get(obj, "metadata.publication_date") + dates = [{"date": pub_date, "dateType": "Issued"}] if pub_date else [] updated = False @@ -428,7 +437,7 @@ def get_related_identifiers(self, obj): if hasattr(obj, "parent"): parent_record = obj.parent else: - parent_record = obj["parent"] + parent_record = obj.get("parent", {}) parent_doi = parent_record.get("pids", {}).get("doi") if parent_doi: diff --git a/invenio_rdm_records/resources/serializers/dublincore/schema.py b/invenio_rdm_records/resources/serializers/dublincore/schema.py index 48cbc2745..541b762a7 100644 --- a/invenio_rdm_records/resources/serializers/dublincore/schema.py +++ b/invenio_rdm_records/resources/serializers/dublincore/schema.py @@ -12,6 +12,7 @@ from flask import current_app from flask_resources.serializers import BaseSerializerSchema from marshmallow import fields, missing +from pydash import py_ from ..schemas import CommonFieldsMixin from ..ui.schema import current_default_locale @@ -91,22 +92,22 @@ def get_relations(self, obj): # FIXME: Add after UI support is there # Alternate identifiers - for a in obj["metadata"].get("alternate_identifiers", []): + for a in obj.get("metadata", {}).get("alternate_identifiers", []): rels.append(self._transform_identifier(a["identifier"], a["scheme"])) # Related identifiers - for a in obj["metadata"].get("related_identifiers", []): + for a in obj.get("metadata", {}).get("related_identifiers", []): rels.append(self._transform_identifier(a["identifier"], a["scheme"])) # Communities - communities = obj["parent"].get("communities", {}).get("entries", []) + communities = obj.get("parent", {}).get("communities", {}).get("entries", []) for community in communities: slug = community["slug"] url = f"{current_app.config['SITE_UI_URL']}/communities/{slug}" rels.append(self._transform_identifier(url, "url")) # Parent doi - parent_pids = obj["parent"].get("pids", {}) + parent_pids = obj.get("parent", {}).get("pids", {}) for key, value in parent_pids.items(): if key == "doi": rels.append(self._transform_identifier(value["identifier"], key)) @@ -117,13 +118,14 @@ def get_rights(self, obj): """Get rights.""" rights = [] - access_right = obj["access"]["status"] - if access_right == "metadata-only": - access_right = "closed" + access_right = py_.get(obj, "access.status") + if access_right: + if access_right == "metadata-only": + access_right = "closed" - rights.append(f"info:eu-repo/semantics/{access_right}Access") + rights.append(f"info:eu-repo/semantics/{access_right}Access") - for right in obj["metadata"].get("rights", []): + for right in obj.get("metadata", {}).get("rights", []): rights.append(right.get("title").get(current_default_locale())) if right.get("id"): license_url = right.get("props", {}).get("url") @@ -138,9 +140,14 @@ def get_rights(self, obj): def get_dates(self, obj): """Get dates.""" - dates = [obj["metadata"]["publication_date"]] + dates = [] - if obj["access"]["status"] == "embargoed": + publication_date = py_.get(obj, "metadata.publication_date") + if publication_date: + dates.append(publication_date) + + access_right = py_.get(obj, "access.status") + if access_right == "embargoed": date = obj["access"]["embargo"]["until"] dates.append(f"info:eu-repo/date/embargoEnd/{date}") @@ -181,12 +188,16 @@ def get_subjects(self, obj): def get_types(self, obj): """Get resource type.""" + resource_type_id = py_.get(obj, "metadata.resource_type.id") + if not resource_type_id: + return missing + props = get_vocabulary_props( "resourcetypes", [ "props.eurepo", ], - obj["metadata"]["resource_type"]["id"], + resource_type_id, ) t = props.get("eurepo") return [t] if t else missing diff --git a/invenio_rdm_records/resources/serializers/marcxml/schema.py b/invenio_rdm_records/resources/serializers/marcxml/schema.py index f2e70cebe..4af87ea17 100644 --- a/invenio_rdm_records/resources/serializers/marcxml/schema.py +++ b/invenio_rdm_records/resources/serializers/marcxml/schema.py @@ -14,6 +14,7 @@ from flask_resources.serializers import BaseSerializerSchema from marshmallow import fields, missing from marshmallow_utils.html import sanitize_unicode +from pydash import py_ from ..schemas import CommonFieldsMixin from ..ui.schema import current_default_locale @@ -491,30 +492,33 @@ def get_types_and_communities(self, obj): if communities: slugs = [community.get("slug") for community in communities] output += [{"a": f"user-{slug}"} for slug in slugs] - props = get_vocabulary_props( - "resourcetypes", - [ - "props.eurepo", - "props.marc21_type", - "props.marc21_subtype", - ], - obj["metadata"]["resource_type"]["id"], - ) - props_eurepo = props.get("eurepo") - if props_eurepo: - eurepo = {"a": props_eurepo} - output.append(eurepo) - - resource_types = {} - - resource_type = props.get("marc21_type") - if resource_type: - resource_types["a"] = resource_type - resource_subtype = props.get("marc21_subtype") - if resource_subtype: - resource_types["b"] = resource_subtype - - if resource_types: - output.append(resource_types) + + resource_type_id = py_.get(obj, "metadata.resource_type.id") + if resource_type_id: + props = get_vocabulary_props( + "resourcetypes", + [ + "props.eurepo", + "props.marc21_type", + "props.marc21_subtype", + ], + resource_type_id, + ) + props_eurepo = props.get("eurepo") + if props_eurepo: + eurepo = {"a": props_eurepo} + output.append(eurepo) + + resource_types = {} + + resource_type = props.get("marc21_type") + if resource_type: + resource_types["a"] = resource_type + resource_subtype = props.get("marc21_subtype") + if resource_subtype: + resource_types["b"] = resource_subtype + + if resource_types: + output.append(resource_types) return output or missing diff --git a/invenio_rdm_records/resources/serializers/schemaorg/schema.py b/invenio_rdm_records/resources/serializers/schemaorg/schema.py index 8df910e76..fff84061d 100644 --- a/invenio_rdm_records/resources/serializers/schemaorg/schema.py +++ b/invenio_rdm_records/resources/serializers/schemaorg/schema.py @@ -207,10 +207,14 @@ def get_id(self, obj): def get_type(self, obj): """Get type. Use the vocabulary service to get the schema.org type.""" + resource_type_id = py_.get(obj, "metadata.resource_type.id") + if not resource_type_id: + return missing + props = get_vocabulary_props( "resourcetypes", ["props.schema.org"], - py_.get(obj, "metadata.resource_type.id"), + resource_type_id, ) ret = props.get("schema.org", "https://schema.org/CreativeWork") return ret @@ -230,8 +234,12 @@ def get_format(self, obj): def get_publication_date(self, obj): """Get publication date.""" + publication_date = py_.get(obj, "metadata.publication_date") + if not publication_date: + return missing + try: - parsed_date = parse_edtf(py_.get(obj, "metadata.publication_date")) + parsed_date = parse_edtf(publication_date) except ParseException: return missing diff --git a/invenio_rdm_records/resources/serializers/schemas.py b/invenio_rdm_records/resources/serializers/schemas.py index 6d6b661b4..0a7034d56 100644 --- a/invenio_rdm_records/resources/serializers/schemas.py +++ b/invenio_rdm_records/resources/serializers/schemas.py @@ -8,6 +8,7 @@ """Base parsing functions for the various serializers.""" from marshmallow import missing +from pydash import py_ class CommonFieldsMixin: @@ -55,7 +56,8 @@ def get_locations(self, obj): def get_titles(self, obj): """Get titles.""" - return [obj["metadata"]["title"]] + title = py_.get(obj, "metadata.title") + return [title] if title else missing def get_identifiers(self, obj): """Get identifiers.""" @@ -67,7 +69,9 @@ def get_identifiers(self, obj): def get_creators(self, obj): """Get creators.""" - return [c["person_or_org"]["name"] for c in obj["metadata"].get("creators", [])] + return [ + c["person_or_org"]["name"] for c in obj["metadata"].get("creators", []) + ] or missing def get_publishers(self, obj): """Get publishers.""" diff --git a/invenio_rdm_records/resources/serializers/ui/schema.py b/invenio_rdm_records/resources/serializers/ui/schema.py index 8f6fa6557..05395550a 100644 --- a/invenio_rdm_records/resources/serializers/ui/schema.py +++ b/invenio_rdm_records/resources/serializers/ui/schema.py @@ -14,6 +14,7 @@ from functools import partial from babel_edtf import parse_edtf +from edtf.parser.grammar import ParseException from flask import current_app, g from flask_resources import BaseObjectSchema from invenio_communities.communities.resources.ui_schema import ( @@ -30,6 +31,7 @@ from marshmallow_utils.fields import FormatEDTF as FormatEDTF_ from marshmallow_utils.fields import SanitizedHTML, SanitizedUnicode, StrippedHTML from marshmallow_utils.fields.babel import gettext_from_dict +from pyparsing import ParseException from .fields import AccessStatusField @@ -218,12 +220,18 @@ def _format_journal(journal, publication_date): journal_issue = journal.get("issue") journal_volume = journal.get("volume") journal_pages = journal.get("pages") - publication_date_edtf = ( - parse_edtf(publication_date).lower_strict() if publication_date else None - ) - publication_date_formatted = ( - f"{publication_date_edtf.tm_year}" if publication_date_edtf else None - ) + + try: + publication_date_edtf = ( + parse_edtf(publication_date).lower_strict() + if publication_date + else None + ) + publication_date_formatted = ( + f"{publication_date_edtf.tm_year}" if publication_date_edtf else None + ) + except ParseException: + publication_date_formatted = None title = f"{journal_title}" if journal_title else None vol_issue = f"{journal_volume}" if journal_volume else None