serializers: ensure values are not None before access

inveniosoftware · Sep 18, 2024 · 4c3dd6b · 4c3dd6b
1 parent 8476188
commit 4c3dd6b
Show file tree

Hide file tree

Showing 7 changed files with 106 additions and 53 deletions.
diff --git a/invenio_rdm_records/resources/serializers/csl/schema.py b/invenio_rdm_records/resources/serializers/csl/schema.py
@@ -13,6 +13,7 @@
 from flask_resources.serializers import BaseSerializerSchema
 from marshmallow import Schema, fields, missing, pre_dump
 from marshmallow_utils.fields import SanitizedUnicode, StrippedHTML
+from pydash import py_
 
 from ..schemas import CommonFieldsMixin
 from ..utils import get_vocabulary_props
@@ -62,19 +63,27 @@ class CSLJSONSchema(BaseSerializerSchema, CommonFieldsMixin):
 
     def get_type(self, obj):
         """Get resource type."""
+        resource_type_id = py_.get(obj, "metadata.resource_type.id")
+        if not resource_type_id:
+            return missing
+
         props = get_vocabulary_props(
             "resourcetypes",
             [
                 "props.csl",
             ],
-            obj["metadata"]["resource_type"]["id"],
+            resource_type_id,
         )
         return props.get("csl", "article")  # article is CSL "Other"
 
     def get_issued(self, obj):
         """Get issued dates."""
+        publication_date = py_.get(obj, "metadata.publication_date")
+        if not publication_date:
+            return missing
+
         try:
-            parsed = parse_edtf(obj["metadata"].get("publication_date"))
+            parsed = parse_edtf(publication_date)
         except EDTFParseException:
             return missing
 

diff --git a/invenio_rdm_records/resources/serializers/datacite/schema.py b/invenio_rdm_records/resources/serializers/datacite/schema.py
@@ -20,6 +20,7 @@
 from marshmallow import Schema, ValidationError, fields, missing, post_dump, validate
 from marshmallow_utils.fields import SanitizedUnicode
 from marshmallow_utils.html import strip_html
+from pydash import py_
 
 from ....proxies import current_rdm_records_service
 from ...serializers.ui.schema import current_default_locale
@@ -206,10 +207,14 @@ class DataCite43Schema(BaseSerializerSchema):
 
     def get_type(self, obj):
         """Get resource type."""
+        resource_type_id = py_.get(obj, "metadata.resource_type.id")
+        if not resource_type_id:
+            return missing
+
         props = get_vocabulary_props(
             "resourcetypes",
             ["props.datacite_general", "props.datacite_type"],
-            obj["metadata"]["resource_type"]["id"],
+            resource_type_id,
         )
         return {
             "resourceTypeGeneral": props.get("datacite_general", "Other"),
@@ -261,8 +266,11 @@ def get_descriptions(self, obj):
 
     def get_publication_year(self, obj):
         """Get publication year from edtf date."""
+        publication_date = py_.get(obj, "metadata.publication_date")
+        if not publication_date:
+            return missing
+
         try:
-            publication_date = obj["metadata"]["publication_date"]
             parsed_date = parse_edtf(publication_date)
             return str(parsed_date.lower_strict().tm_year)
         except ParseException:
@@ -274,7 +282,8 @@ def get_publication_year(self, obj):
 
     def get_dates(self, obj):
         """Get dates."""
-        dates = [{"date": obj["metadata"]["publication_date"], "dateType": "Issued"}]
+        pub_date = py_.get(obj, "metadata.publication_date")
+        dates = [{"date": pub_date, "dateType": "Issued"}] if pub_date else []
 
         updated = False
 
@@ -428,7 +437,7 @@ def get_related_identifiers(self, obj):
             if hasattr(obj, "parent"):
                 parent_record = obj.parent
             else:
-                parent_record = obj["parent"]
+                parent_record = obj.get("parent", {})
             parent_doi = parent_record.get("pids", {}).get("doi")
 
             if parent_doi:

diff --git a/invenio_rdm_records/resources/serializers/dublincore/schema.py b/invenio_rdm_records/resources/serializers/dublincore/schema.py
@@ -12,6 +12,7 @@
 from flask import current_app
 from flask_resources.serializers import BaseSerializerSchema
 from marshmallow import fields, missing
+from pydash import py_
 
 from ..schemas import CommonFieldsMixin
 from ..ui.schema import current_default_locale
@@ -91,22 +92,22 @@ def get_relations(self, obj):
         # FIXME: Add after UI support is there
 
         # Alternate identifiers
-        for a in obj["metadata"].get("alternate_identifiers", []):
+        for a in obj.get("metadata", {}).get("alternate_identifiers", []):
             rels.append(self._transform_identifier(a["identifier"], a["scheme"]))
 
         # Related identifiers
-        for a in obj["metadata"].get("related_identifiers", []):
+        for a in obj.get("metadata", {}).get("related_identifiers", []):
             rels.append(self._transform_identifier(a["identifier"], a["scheme"]))
 
         # Communities
-        communities = obj["parent"].get("communities", {}).get("entries", [])
+        communities = obj.get("parent", {}).get("communities", {}).get("entries", [])
         for community in communities:
             slug = community["slug"]
             url = f"{current_app.config['SITE_UI_URL']}/communities/{slug}"
             rels.append(self._transform_identifier(url, "url"))
 
         # Parent doi
-        parent_pids = obj["parent"].get("pids", {})
+        parent_pids = obj.get("parent", {}).get("pids", {})
         for key, value in parent_pids.items():
             if key == "doi":
                 rels.append(self._transform_identifier(value["identifier"], key))
@@ -117,13 +118,14 @@ def get_rights(self, obj):
         """Get rights."""
         rights = []
 
-        access_right = obj["access"]["status"]
-        if access_right == "metadata-only":
-            access_right = "closed"
+        access_right = py_.get(obj, "access.status")
+        if access_right:
+            if access_right == "metadata-only":
+                access_right = "closed"
 
-        rights.append(f"info:eu-repo/semantics/{access_right}Access")
+            rights.append(f"info:eu-repo/semantics/{access_right}Access")
 
-        for right in obj["metadata"].get("rights", []):
+        for right in obj.get("metadata", {}).get("rights", []):
             rights.append(right.get("title").get(current_default_locale()))
             if right.get("id"):
                 license_url = right.get("props", {}).get("url")
@@ -138,9 +140,14 @@ def get_rights(self, obj):
 
     def get_dates(self, obj):
         """Get dates."""
-        dates = [obj["metadata"]["publication_date"]]
+        dates = []
 
-        if obj["access"]["status"] == "embargoed":
+        publication_date = py_.get(obj, "metadata.publication_date")
+        if publication_date:
+            dates.append(publication_date)
+
+        access_right = py_.get(obj, "access.status")
+        if access_right == "embargoed":
             date = obj["access"]["embargo"]["until"]
             dates.append(f"info:eu-repo/date/embargoEnd/{date}")
 
@@ -181,12 +188,16 @@ def get_subjects(self, obj):
 
     def get_types(self, obj):
         """Get resource type."""
+        resource_type_id = py_.get(obj, "metadata.resource_type.id")
+        if not resource_type_id:
+            return missing
+
         props = get_vocabulary_props(
             "resourcetypes",
             [
                 "props.eurepo",
             ],
-            obj["metadata"]["resource_type"]["id"],
+            resource_type_id,
         )
         t = props.get("eurepo")
         return [t] if t else missing

diff --git a/invenio_rdm_records/resources/serializers/marcxml/schema.py b/invenio_rdm_records/resources/serializers/marcxml/schema.py
@@ -14,6 +14,7 @@
 from flask_resources.serializers import BaseSerializerSchema
 from marshmallow import fields, missing
 from marshmallow_utils.html import sanitize_unicode
+from pydash import py_
 
 from ..schemas import CommonFieldsMixin
 from ..ui.schema import current_default_locale
@@ -491,30 +492,33 @@ def get_types_and_communities(self, obj):
         if communities:
             slugs = [community.get("slug") for community in communities]
             output += [{"a": f"user-{slug}"} for slug in slugs]
-        props = get_vocabulary_props(
-            "resourcetypes",
-            [
-                "props.eurepo",
-                "props.marc21_type",
-                "props.marc21_subtype",
-            ],
-            obj["metadata"]["resource_type"]["id"],
-        )
-        props_eurepo = props.get("eurepo")
-        if props_eurepo:
-            eurepo = {"a": props_eurepo}
-            output.append(eurepo)
-
-        resource_types = {}
-
-        resource_type = props.get("marc21_type")
-        if resource_type:
-            resource_types["a"] = resource_type
-        resource_subtype = props.get("marc21_subtype")
-        if resource_subtype:
-            resource_types["b"] = resource_subtype
-
-        if resource_types:
-            output.append(resource_types)
+
+        resource_type_id = py_.get(obj, "metadata.resource_type.id")
+        if resource_type_id:
+            props = get_vocabulary_props(
+                "resourcetypes",
+                [
+                    "props.eurepo",
+                    "props.marc21_type",
+                    "props.marc21_subtype",
+                ],
+                resource_type_id,
+            )
+            props_eurepo = props.get("eurepo")
+            if props_eurepo:
+                eurepo = {"a": props_eurepo}
+                output.append(eurepo)
+
+            resource_types = {}
+
+            resource_type = props.get("marc21_type")
+            if resource_type:
+                resource_types["a"] = resource_type
+            resource_subtype = props.get("marc21_subtype")
+            if resource_subtype:
+                resource_types["b"] = resource_subtype
+
+            if resource_types:
+                output.append(resource_types)
 
         return output or missing
diff --git a/invenio_rdm_records/resources/serializers/schemaorg/schema.py b/invenio_rdm_records/resources/serializers/schemaorg/schema.py
@@ -207,10 +207,14 @@ def get_id(self, obj):
 
     def get_type(self, obj):
         """Get type. Use the vocabulary service to get the schema.org type."""
+        resource_type_id = py_.get(obj, "metadata.resource_type.id")
+        if not resource_type_id:
+            return missing
+
         props = get_vocabulary_props(
             "resourcetypes",
             ["props.schema.org"],
-            py_.get(obj, "metadata.resource_type.id"),
+            resource_type_id,
         )
         ret = props.get("schema.org", "https://schema.org/CreativeWork")
         return ret
@@ -230,8 +234,12 @@ def get_format(self, obj):
 
     def get_publication_date(self, obj):
         """Get publication date."""
+        publication_date = py_.get(obj, "metadata.publication_date")
+        if not publication_date:
+            return missing
+
         try:
-            parsed_date = parse_edtf(py_.get(obj, "metadata.publication_date"))
+            parsed_date = parse_edtf(publication_date)
         except ParseException:
             return missing
 

diff --git a/invenio_rdm_records/resources/serializers/schemas.py b/invenio_rdm_records/resources/serializers/schemas.py
@@ -8,6 +8,7 @@
 """Base parsing functions for the various serializers."""
 
 from marshmallow import missing
+from pydash import py_
 
 
 class CommonFieldsMixin:
@@ -55,7 +56,8 @@ def get_locations(self, obj):
 
     def get_titles(self, obj):
         """Get titles."""
-        return [obj["metadata"]["title"]]
+        title = py_.get(obj, "metadata.title")
+        return [title] if title else missing
 
     def get_identifiers(self, obj):
         """Get identifiers."""
@@ -67,7 +69,9 @@ def get_identifiers(self, obj):
 
     def get_creators(self, obj):
         """Get creators."""
-        return [c["person_or_org"]["name"] for c in obj["metadata"].get("creators", [])]
+        return [
+            c["person_or_org"]["name"] for c in obj["metadata"].get("creators", [])
+        ] or missing
 
     def get_publishers(self, obj):
         """Get publishers."""

diff --git a/invenio_rdm_records/resources/serializers/ui/schema.py b/invenio_rdm_records/resources/serializers/ui/schema.py
@@ -14,6 +14,7 @@
 from functools import partial
 
 from babel_edtf import parse_edtf
+from edtf.parser.grammar import ParseException
 from flask import current_app, g
 from flask_resources import BaseObjectSchema
 from invenio_communities.communities.resources.ui_schema import (
@@ -30,6 +31,7 @@
 from marshmallow_utils.fields import FormatEDTF as FormatEDTF_
 from marshmallow_utils.fields import SanitizedHTML, SanitizedUnicode, StrippedHTML
 from marshmallow_utils.fields.babel import gettext_from_dict
+from pyparsing import ParseException
 
 from .fields import AccessStatusField
 
@@ -218,12 +220,18 @@ def _format_journal(journal, publication_date):
         journal_issue = journal.get("issue")
         journal_volume = journal.get("volume")
         journal_pages = journal.get("pages")
-        publication_date_edtf = (
-            parse_edtf(publication_date).lower_strict() if publication_date else None
-        )
-        publication_date_formatted = (
-            f"{publication_date_edtf.tm_year}" if publication_date_edtf else None
-        )
+
+        try:
+            publication_date_edtf = (
+                parse_edtf(publication_date).lower_strict()
+                if publication_date
+                else None
+            )
+            publication_date_formatted = (
+                f"{publication_date_edtf.tm_year}" if publication_date_edtf else None
+            )
+        except ParseException:
+            publication_date_formatted = None
 
         title = f"{journal_title}" if journal_title else None
         vol_issue = f"{journal_volume}" if journal_volume else None