diff --git a/django/library/metadata.py b/django/library/metadata.py index 1e78a51b2..8b74dd243 100644 --- a/django/library/metadata.py +++ b/django/library/metadata.py @@ -5,7 +5,6 @@ Person, Organization, Role, - CreativeWork, ) from codemeticulous.cff.models import CitationFileFormat from codemeticulous.datacite.models import DataCite @@ -46,73 +45,142 @@ def convert_affiliation(cls, affiliation: dict) -> Organization: ) @classmethod - def convert_release_contributors( + def _convert_actor( + cls, contributor, actor_type: Literal["author", "contributor"], index: int + ) -> Person | Organization: + # https://www.w3.org/TR/json-ld11/#identifying-blank-nodes + actor_id = contributor.orcid_url or f"_:{actor_type}_{index + 1}" + if contributor.is_organization: + return Organization( + id_=actor_id, + name=contributor.name, + ) + elif contributor.is_person: + affiliation = ( + cls.convert_affiliation(contributor.primary_affiliation) + if contributor.primary_affiliation + else None + ) + return Person( + id_=actor_id, + givenName=contributor.get_given_name() or None, + familyName=contributor.get_family_name() or None, + affiliation=affiliation, + email=contributor.email or None, + ) + else: + raise ValueError(f"Invalid actor type '{actor_type}'") + + @classmethod + def _convert_roles(cls, actor_id: str, roles: list[str]) -> list[Role]: + return [ + Role( + id_=actor_id, + roleName=role, + ) + for role in roles + if role != "author" + ] + + @classmethod + def convert_contributors( cls, - release_contributors, + contributors, actor_type: Literal["author", "contributor"], ) -> list[Person | Organization | Role]: + """converts a list of Contributor or ReleaseContributor objects to a list of codemeta actors. + If ReleaseContributors are given, the roles are also converted to codemeta/schema.org roles + """ codemeta_actors = [] codemeta_roles = [] - for num, rc in enumerate(release_contributors): - contributor = rc.contributor - # https://www.w3.org/TR/json-ld11/#identifying-blank-nodes - contributor_id = contributor.orcid_url or f"_:{actor_type}_{num + 1}" - if contributor.is_organization: - codemeta_actors.append( - Organization( - id_=contributor_id, - name=contributor.name, - ) - ) - elif contributor.is_person: - affiliation = ( - cls.convert_affiliation(contributor.primary_affiliation) - if contributor.primary_affiliation - else None - ) - codemeta_actors.append( - Person( - id_=contributor_id, - givenName=contributor.get_given_name() or None, - familyName=contributor.get_family_name() or None, - affiliation=affiliation, - email=contributor.email or None, - ) + for index, contributor_or_rc in enumerate(contributors): + contributor = ( + contributor_or_rc.contributor + if hasattr(contributor_or_rc, "contributor") + else contributor_or_rc + ) + actor = cls._convert_actor(contributor, actor_type, index) + codemeta_actors.append(actor) + if hasattr(contributor_or_rc, "roles"): + codemeta_roles.extend( + cls._convert_roles(actor.id_, contributor_or_rc.roles) ) - - for role in rc.roles: - if role != "author": - codemeta_roles.append( - Role( - id_=contributor_id, - roleName=role, - ) - ) - return codemeta_actors + codemeta_roles @classmethod - def to_textual_creative_work(cls, text: str) -> CreativeWork: + def to_textual_creative_work(cls, text: str) -> dict: return { "@type": "CreativeWork", "text": text, } + @classmethod + def license_to_creative_work(cls, license) -> dict: + return { + "@type": "CreativeWork", + "name": license.name, + "url": license.url, + } + + @classmethod + def _common_codebase_fields(cls, codebase) -> dict: + return dict( + type_="SoftwareSourceCode", + name=codebase.title, + codeRepository=codebase.repository_url or None, + applicationCategory="Computational Model", + citation=[ + cls.to_textual_creative_work(text) + for text in [ + codebase.references_text, + codebase.replication_text, + ] + if text + ] + or None, + # tags are sorted so that comparisons are deterministic + keywords=[tag.name for tag in codebase.tags.all().order_by("name")] or None, + publisher=cls.COMSES_ORGANIZATION, + description=codebase.description.raw, + referencePublication=codebase.associated_publication_text or None, + ) + + @classmethod + def _convert_codebase(cls, codebase) -> CodeMeta: + return CodeMeta( + **cls._common_codebase_fields(codebase), + id_=codebase.permanent_url, + identifier=( + [codebase.doi, codebase.permanent_url] + if codebase.doi + else codebase.permanent_url + ), + author=cls.convert_contributors(codebase.all_author_contributors, "author") + or None, + contributor=cls.convert_contributors( + codebase.all_nonauthor_contributors, "contributor" + ) + or None, + dateCreated=codebase.date_created.date(), + datePublished=( + codebase.last_published_on.date() + if codebase.last_published_on + else None + ), + url=codebase.permanent_url, + ) + @classmethod def _convert_release(cls, release) -> CodeMeta: codebase = release.codebase return CodeMeta( - type_="SoftwareSourceCode", + **cls._common_codebase_fields(codebase), id_=release.permanent_url, identifier=( [release.doi, release.permanent_url] if release.doi else release.permanent_url ), - name=codebase.title, - # FIXME: is this semantically correct? - # isPartOf=COMSES_MODEL_LIBRARY_CREATIVE_WORK, - codeRepository=codebase.repository_url or None, programmingLanguage=[ # FIXME: this can include "version" when langs are refactored {"@type": "ComputerLanguage", "name": pl.name} @@ -125,53 +193,40 @@ def _convert_release(cls, release) -> CodeMeta: # FIXME: anything to use this for? it can be either the target os or target # framework (e.g. Mesa, NetLogo) but these are both already covered # targetProduct=release.os, - applicationCategory="Computational Model", - # applicationSubCategory="Agent-Based Model", <-- would be nice downloadUrl=f"{settings.BASE_URL}{release.get_download_url()}", operatingSystem=release.os, releaseNotes=release.release_notes.raw, supportingData=release.output_data_url or None, - author=cls.convert_release_contributors( + author=cls.convert_contributors( release.author_release_contributors, "author" ) or None, - citation=[ - cls.to_textual_creative_work(text) - for text in [ - codebase.references_text, - codebase.replication_text, - ] - if text - ] - or None, - contributor=cls.convert_release_contributors( + contributor=cls.convert_contributors( release.nonauthor_release_contributors, "contributor" ) or None, copyrightYear=( release.last_published_on.year if release.last_published_on else None ), - dateCreated=codebase.date_created.date(), + dateCreated=release.date_created.date(), dateModified=( release.last_modified.date() if release.last_modified else None ), datePublished=( release.last_published_on.date() if release.last_published_on else None ), - # tags are sorted so that comparisons are deterministic - keywords=[tag.name for tag in codebase.tags.all().order_by("name")] or None, - license=release.license.url if release.license else None, - publisher=cls.COMSES_ORGANIZATION, + license=( + cls.license_to_creative_work(release.license) + if release.license + else None + ), version=release.version_number, - description=codebase.description.raw, url=release.permanent_url, embargoEndDate=release.embargo_end_date, - referencePublication=codebase.associated_publication_text or None, ) @classmethod - def _convert_release_minimal(cls, release) -> CodeMeta: - codebase = release.codebase + def _convert_codebase_minimal(cls, codebase) -> CodeMeta: return CodeMeta( type_="SoftwareSourceCode", name=codebase.title, @@ -184,18 +239,20 @@ def convert_release(cls, release) -> CodeMeta: except Exception as e: # in case something goes horribly wrong, log the error and return a valid but # minimal codemeta object - logger.exception("Error when generating codemeta: %s", e) - return cls._convert_release_minimal(release) + logger.exception( + f"Error when generating codemeta for release {release}: {e}" + ) + return cls._convert_codebase_minimal(release.codebase) @classmethod def convert_codebase(cls, codebase) -> CodeMeta: - # TODO: finish this, should extract common stuff from create_release - return CodeMeta( - type_="SoftwareSourceCode", - id_=codebase.permanent_url, - name=codebase.title, - publisher=cls.COMSES_ORGANIZATION, - ) + try: + return cls._convert_codebase(codebase) + except Exception as e: + logger.exception( + f"Error when generating codemeta for codebase {codebase}: {e}" + ) + return cls._convert_codebase_minimal(codebase) class CitationFileFormatConverter: @@ -205,47 +262,113 @@ class CitationFileFormatConverter: def convert_release( cls, release, codemeta: CodeMeta | dict = None ) -> CitationFileFormat: - if not codemeta: - codemeta = CodeMetaConverter.convert_release(release) - elif isinstance(codemeta, dict): - try: - codemeta = CodeMeta(**codemeta) - except: - codemeta = None + codemeta = coerce_codemeta(codemeta, release=release) return convert("codemeta", "cff", codemeta) class DataCiteConverter: """Create datacite metadata objects that represent the metadata for a codebase or release.""" + @classmethod + def get_formats(cls): + return ["text/plain"] + + @classmethod + def to_related_identifier( + cls, related_identifier: str, relation_type: str, related_identifier_type="DOI" + ): + return { + "relatedIdentifier": related_identifier, + "relatedIdentifierType": related_identifier_type, + "relationType": relation_type, + } + + @classmethod + def get_release_related_identifiers(cls, release): + related_identifiers = [] + if release.codebase.doi: + related_identifiers.append( + cls.to_related_identifier(release.codebase.doi, "IsVersionOf") + ) + previous_release = release.get_previous_release() + next_release = release.get_next_release() + # set relationship to previous_release + if previous_release and previous_release.doi: + related_identifiers.append( + cls.to_related_identifier(previous_release.doi, "IsNewVersionOf") + ) + + # set relationship to next_release + if next_release and next_release.doi: + related_identifiers.append( + cls.to_related_identifier(next_release.doi, "IsPreviousVersionOf") + ) + return related_identifiers or None + + @classmethod + def get_codebase_related_identifiers(cls, codebase): + # other identifiers to consider? (too many, prioritize which ones) + # IsReviewedBy, IsRequiredBy, IsDocumentedBy, IsReferencedBy, IsVariantOf, IsDerivedFrom, Obsoletes, IsObsoletedBy, IsCitedBy, IsSupplementTo, IsSupplementedBy + return [ + cls.to_related_identifier(release.doi, "HasVersion") + for release in codebase.ordered_releases_list() + if release.doi + ] or None + + @classmethod + def get_codebase_descriptions(cls, codebase): + return [ + { + "description": codebase.description.raw, + "descriptionType": "Abstract", + }, + { + "description": "The DOI pointing to this resource is a `concept version` representing all versions of this computational model and will always redirect to the latest version of this computational model. See https://zenodo.org/help/versioning for more details on the rationale behind a concept version DOI that rolls up all versions of a given computational model or any other digital research object.", + "descriptionType": "Other", + }, + ] + @classmethod def convert_release(cls, release, codemeta: CodeMeta | dict = None) -> DataCite: - if not codemeta: - codemeta = CodeMetaConverter.convert_release(release) - elif isinstance(codemeta, dict): - try: - codemeta = CodeMeta(**codemeta) - except: - codemeta = None + codemeta = coerce_codemeta(codemeta, release=release) # datacite always needs a publication date if not codemeta.datePublished: codemeta.datePublished = date.today() - # any additional fields that cannot be derived from codemeta - addl_datacite_fields = {} - return convert("codemeta", "datacite", codemeta, **addl_datacite_fields) + return convert( + "codemeta", + "datacite", + codemeta, + formats=cls.get_formats(), + relatedIdentifiers=cls.get_release_related_identifiers(release), + ) @classmethod def convert_codebase(cls, codebase, codemeta: CodeMeta | dict = None) -> DataCite: - if not codemeta: - codemeta = CodeMetaConverter.convert_codebase(codebase) - elif isinstance(codemeta, dict): - try: - codemeta = CodeMeta(**codemeta) - except: - codemeta = None + codemeta = coerce_codemeta(codemeta, codebase=codebase) # datacite always needs a publication date if not codemeta.datePublished: codemeta.datePublished = date.today() - # any additional fields that cannot be derived from codemeta - addl_datacite_fields = {} - return convert("codemeta", "datacite", codemeta, **addl_datacite_fields) + return convert( + "codemeta", + "datacite", + codemeta, + # any additional fields that cannot be derived from codemeta + descriptions=cls.get_codebase_descriptions(codebase), + relatedIdentifiers=cls.get_codebase_related_identifiers(codebase), + ) + + +def coerce_codemeta(codemeta: dict | CodeMeta, codebase=None, release=None) -> CodeMeta: + """make sure that codemeta is a CodeMeta object. If we didn't receive anything, + try to re-generate it""" + if not codemeta: + if codebase: + codemeta = CodeMetaConverter.convert_codebase(codebase) + elif release: + codemeta = CodeMetaConverter.convert_release(release) + elif isinstance(codemeta, dict): + try: + codemeta = CodeMeta(**codemeta) + except: + codemeta = None + return codemeta diff --git a/django/library/models.py b/django/library/models.py index be8e173e5..84b64ab0c 100644 --- a/django/library/models.py +++ b/django/library/models.py @@ -654,11 +654,11 @@ def datacite(self): return DataCiteSchema.from_codebase(self) # FIXME: replace the above datacite metadata generation with this - # @property - # def datacite(self): - # return DataCiteConverter.convert_codebase( - # self, codemeta=self.codemeta_snapshot or None - # ) + @property + def datacite_temp(self): + return DataCiteConverter.convert_codebase( + self, codemeta=self.codemeta_snapshot or None + ) @property def is_replication(self): @@ -1636,11 +1636,6 @@ def bagit_info(self): # FIXME: check codemeta for additional metadata } - @cached_property - def common_metadata(self): - """Returns a CommonMetadata object used to build specific metadata objects: for example CodeMeta or DataCite""" - return CommonMetadata(self) - @cached_property def datacite(self): if not self.live: @@ -1650,11 +1645,15 @@ def datacite(self): return DataCiteSchema.from_release(self) # FIXME: replace the above datacite metadata generation with this - # @property - # def datacite_temp(self): - # return DataCiteConverter.convert_release( - # self, codemeta=self.codemeta_snapshot or None - # ) + @property + def datacite_temp(self): + if not self.live: + logger.warning( + "Attempting to generate datacite for an unpublished release: %s", self + ) + return DataCiteConverter.convert_release( + self, codemeta=self.codemeta_snapshot or None + ) @property def codemeta(self): diff --git a/django/library/views.py b/django/library/views.py index d05434223..ee389fd61 100644 --- a/django/library/views.py +++ b/django/library/views.py @@ -828,7 +828,8 @@ def contributors(self, request, **kwargs): ) crs.is_valid(raise_exception=True) crs.save() - # trigger a re-generation of release codemeta by saving + # re-generate codemeta + codebase_release.codebase.save(rebuild_metadata=False) codebase_release.save() return Response(status=status.HTTP_204_NO_CONTENT)