Skip to content

Commit

Permalink
serializers: ensure values are not None before access
Browse files Browse the repository at this point in the history
  • Loading branch information
rekt-hard authored and max-moser committed Sep 18, 2024
1 parent 8476188 commit 4c3dd6b
Show file tree
Hide file tree
Showing 7 changed files with 106 additions and 53 deletions.
13 changes: 11 additions & 2 deletions invenio_rdm_records/resources/serializers/csl/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from flask_resources.serializers import BaseSerializerSchema
from marshmallow import Schema, fields, missing, pre_dump
from marshmallow_utils.fields import SanitizedUnicode, StrippedHTML
from pydash import py_

from ..schemas import CommonFieldsMixin
from ..utils import get_vocabulary_props
Expand Down Expand Up @@ -62,19 +63,27 @@ class CSLJSONSchema(BaseSerializerSchema, CommonFieldsMixin):

def get_type(self, obj):
"""Get resource type."""
resource_type_id = py_.get(obj, "metadata.resource_type.id")
if not resource_type_id:
return missing

props = get_vocabulary_props(
"resourcetypes",
[
"props.csl",
],
obj["metadata"]["resource_type"]["id"],
resource_type_id,
)
return props.get("csl", "article") # article is CSL "Other"

def get_issued(self, obj):
"""Get issued dates."""
publication_date = py_.get(obj, "metadata.publication_date")
if not publication_date:
return missing

try:
parsed = parse_edtf(obj["metadata"].get("publication_date"))
parsed = parse_edtf(publication_date)
except EDTFParseException:
return missing

Expand Down
17 changes: 13 additions & 4 deletions invenio_rdm_records/resources/serializers/datacite/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from marshmallow import Schema, ValidationError, fields, missing, post_dump, validate
from marshmallow_utils.fields import SanitizedUnicode
from marshmallow_utils.html import strip_html
from pydash import py_

from ....proxies import current_rdm_records_service
from ...serializers.ui.schema import current_default_locale
Expand Down Expand Up @@ -206,10 +207,14 @@ class DataCite43Schema(BaseSerializerSchema):

def get_type(self, obj):
"""Get resource type."""
resource_type_id = py_.get(obj, "metadata.resource_type.id")
if not resource_type_id:
return missing

props = get_vocabulary_props(
"resourcetypes",
["props.datacite_general", "props.datacite_type"],
obj["metadata"]["resource_type"]["id"],
resource_type_id,
)
return {
"resourceTypeGeneral": props.get("datacite_general", "Other"),
Expand Down Expand Up @@ -261,8 +266,11 @@ def get_descriptions(self, obj):

def get_publication_year(self, obj):
"""Get publication year from edtf date."""
publication_date = py_.get(obj, "metadata.publication_date")
if not publication_date:
return missing

try:
publication_date = obj["metadata"]["publication_date"]
parsed_date = parse_edtf(publication_date)
return str(parsed_date.lower_strict().tm_year)
except ParseException:
Expand All @@ -274,7 +282,8 @@ def get_publication_year(self, obj):

def get_dates(self, obj):
"""Get dates."""
dates = [{"date": obj["metadata"]["publication_date"], "dateType": "Issued"}]
pub_date = py_.get(obj, "metadata.publication_date")
dates = [{"date": pub_date, "dateType": "Issued"}] if pub_date else []

updated = False

Expand Down Expand Up @@ -428,7 +437,7 @@ def get_related_identifiers(self, obj):
if hasattr(obj, "parent"):
parent_record = obj.parent
else:
parent_record = obj["parent"]
parent_record = obj.get("parent", {})
parent_doi = parent_record.get("pids", {}).get("doi")

if parent_doi:
Expand Down
35 changes: 23 additions & 12 deletions invenio_rdm_records/resources/serializers/dublincore/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from flask import current_app
from flask_resources.serializers import BaseSerializerSchema
from marshmallow import fields, missing
from pydash import py_

from ..schemas import CommonFieldsMixin
from ..ui.schema import current_default_locale
Expand Down Expand Up @@ -91,22 +92,22 @@ def get_relations(self, obj):
# FIXME: Add after UI support is there

# Alternate identifiers
for a in obj["metadata"].get("alternate_identifiers", []):
for a in obj.get("metadata", {}).get("alternate_identifiers", []):
rels.append(self._transform_identifier(a["identifier"], a["scheme"]))

# Related identifiers
for a in obj["metadata"].get("related_identifiers", []):
for a in obj.get("metadata", {}).get("related_identifiers", []):
rels.append(self._transform_identifier(a["identifier"], a["scheme"]))

# Communities
communities = obj["parent"].get("communities", {}).get("entries", [])
communities = obj.get("parent", {}).get("communities", {}).get("entries", [])
for community in communities:
slug = community["slug"]
url = f"{current_app.config['SITE_UI_URL']}/communities/{slug}"
rels.append(self._transform_identifier(url, "url"))

# Parent doi
parent_pids = obj["parent"].get("pids", {})
parent_pids = obj.get("parent", {}).get("pids", {})
for key, value in parent_pids.items():
if key == "doi":
rels.append(self._transform_identifier(value["identifier"], key))
Expand All @@ -117,13 +118,14 @@ def get_rights(self, obj):
"""Get rights."""
rights = []

access_right = obj["access"]["status"]
if access_right == "metadata-only":
access_right = "closed"
access_right = py_.get(obj, "access.status")
if access_right:
if access_right == "metadata-only":
access_right = "closed"

rights.append(f"info:eu-repo/semantics/{access_right}Access")
rights.append(f"info:eu-repo/semantics/{access_right}Access")

for right in obj["metadata"].get("rights", []):
for right in obj.get("metadata", {}).get("rights", []):
rights.append(right.get("title").get(current_default_locale()))
if right.get("id"):
license_url = right.get("props", {}).get("url")
Expand All @@ -138,9 +140,14 @@ def get_rights(self, obj):

def get_dates(self, obj):
"""Get dates."""
dates = [obj["metadata"]["publication_date"]]
dates = []

if obj["access"]["status"] == "embargoed":
publication_date = py_.get(obj, "metadata.publication_date")
if publication_date:
dates.append(publication_date)

access_right = py_.get(obj, "access.status")
if access_right == "embargoed":
date = obj["access"]["embargo"]["until"]
dates.append(f"info:eu-repo/date/embargoEnd/{date}")

Expand Down Expand Up @@ -181,12 +188,16 @@ def get_subjects(self, obj):

def get_types(self, obj):
"""Get resource type."""
resource_type_id = py_.get(obj, "metadata.resource_type.id")
if not resource_type_id:
return missing

props = get_vocabulary_props(
"resourcetypes",
[
"props.eurepo",
],
obj["metadata"]["resource_type"]["id"],
resource_type_id,
)
t = props.get("eurepo")
return [t] if t else missing
Expand Down
54 changes: 29 additions & 25 deletions invenio_rdm_records/resources/serializers/marcxml/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from flask_resources.serializers import BaseSerializerSchema
from marshmallow import fields, missing
from marshmallow_utils.html import sanitize_unicode
from pydash import py_

from ..schemas import CommonFieldsMixin
from ..ui.schema import current_default_locale
Expand Down Expand Up @@ -491,30 +492,33 @@ def get_types_and_communities(self, obj):
if communities:
slugs = [community.get("slug") for community in communities]
output += [{"a": f"user-{slug}"} for slug in slugs]
props = get_vocabulary_props(
"resourcetypes",
[
"props.eurepo",
"props.marc21_type",
"props.marc21_subtype",
],
obj["metadata"]["resource_type"]["id"],
)
props_eurepo = props.get("eurepo")
if props_eurepo:
eurepo = {"a": props_eurepo}
output.append(eurepo)

resource_types = {}

resource_type = props.get("marc21_type")
if resource_type:
resource_types["a"] = resource_type
resource_subtype = props.get("marc21_subtype")
if resource_subtype:
resource_types["b"] = resource_subtype

if resource_types:
output.append(resource_types)

resource_type_id = py_.get(obj, "metadata.resource_type.id")
if resource_type_id:
props = get_vocabulary_props(
"resourcetypes",
[
"props.eurepo",
"props.marc21_type",
"props.marc21_subtype",
],
resource_type_id,
)
props_eurepo = props.get("eurepo")
if props_eurepo:
eurepo = {"a": props_eurepo}
output.append(eurepo)

resource_types = {}

resource_type = props.get("marc21_type")
if resource_type:
resource_types["a"] = resource_type
resource_subtype = props.get("marc21_subtype")
if resource_subtype:
resource_types["b"] = resource_subtype

if resource_types:
output.append(resource_types)

return output or missing
12 changes: 10 additions & 2 deletions invenio_rdm_records/resources/serializers/schemaorg/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,10 +207,14 @@ def get_id(self, obj):

def get_type(self, obj):
"""Get type. Use the vocabulary service to get the schema.org type."""
resource_type_id = py_.get(obj, "metadata.resource_type.id")
if not resource_type_id:
return missing

props = get_vocabulary_props(
"resourcetypes",
["props.schema.org"],
py_.get(obj, "metadata.resource_type.id"),
resource_type_id,
)
ret = props.get("schema.org", "https://schema.org/CreativeWork")
return ret
Expand All @@ -230,8 +234,12 @@ def get_format(self, obj):

def get_publication_date(self, obj):
"""Get publication date."""
publication_date = py_.get(obj, "metadata.publication_date")
if not publication_date:
return missing

try:
parsed_date = parse_edtf(py_.get(obj, "metadata.publication_date"))
parsed_date = parse_edtf(publication_date)
except ParseException:
return missing

Expand Down
8 changes: 6 additions & 2 deletions invenio_rdm_records/resources/serializers/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"""Base parsing functions for the various serializers."""

from marshmallow import missing
from pydash import py_


class CommonFieldsMixin:
Expand Down Expand Up @@ -55,7 +56,8 @@ def get_locations(self, obj):

def get_titles(self, obj):
"""Get titles."""
return [obj["metadata"]["title"]]
title = py_.get(obj, "metadata.title")
return [title] if title else missing

def get_identifiers(self, obj):
"""Get identifiers."""
Expand All @@ -67,7 +69,9 @@ def get_identifiers(self, obj):

def get_creators(self, obj):
"""Get creators."""
return [c["person_or_org"]["name"] for c in obj["metadata"].get("creators", [])]
return [
c["person_or_org"]["name"] for c in obj["metadata"].get("creators", [])
] or missing

def get_publishers(self, obj):
"""Get publishers."""
Expand Down
20 changes: 14 additions & 6 deletions invenio_rdm_records/resources/serializers/ui/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from functools import partial

from babel_edtf import parse_edtf
from edtf.parser.grammar import ParseException
from flask import current_app, g
from flask_resources import BaseObjectSchema
from invenio_communities.communities.resources.ui_schema import (
Expand All @@ -30,6 +31,7 @@
from marshmallow_utils.fields import FormatEDTF as FormatEDTF_
from marshmallow_utils.fields import SanitizedHTML, SanitizedUnicode, StrippedHTML
from marshmallow_utils.fields.babel import gettext_from_dict
from pyparsing import ParseException

from .fields import AccessStatusField

Expand Down Expand Up @@ -218,12 +220,18 @@ def _format_journal(journal, publication_date):
journal_issue = journal.get("issue")
journal_volume = journal.get("volume")
journal_pages = journal.get("pages")
publication_date_edtf = (
parse_edtf(publication_date).lower_strict() if publication_date else None
)
publication_date_formatted = (
f"{publication_date_edtf.tm_year}" if publication_date_edtf else None
)

try:
publication_date_edtf = (
parse_edtf(publication_date).lower_strict()
if publication_date
else None
)
publication_date_formatted = (
f"{publication_date_edtf.tm_year}" if publication_date_edtf else None
)
except ParseException:
publication_date_formatted = None

title = f"{journal_title}" if journal_title else None
vol_issue = f"{journal_volume}" if journal_volume else None
Expand Down

0 comments on commit 4c3dd6b

Please sign in to comment.