From cd0459f40b94d6a1e09063487d77a745a7bc62d2 Mon Sep 17 00:00:00 2001 From: Justin Littman Date: Wed, 22 Nov 2023 07:23:27 -0500 Subject: [PATCH] Abstract out description. --- Gemfile | 1 + Gemfile.lock | 7 ++ app/models/purl_resource.rb | 26 ++++---- lib/description.rb | 88 ++++++++++++++++++++++++++ lib/description/cocina_contributor.rb | 81 ++++++++++++++++++++++++ lib/description/cocina_identifier.rb | 31 +++++++++ lib/description/cocina_note.rb | 22 +++++++ lib/description/cocina_title.rb | 25 ++++++++ lib/description/mods_formatted_name.rb | 40 ++++++++++++ lib/description/mods_identifier.rb | 23 +++++++ lib/description/mods_origin_info.rb | 25 ++++++++ lib/metadata/schema_dot_org.rb | 82 ++++-------------------- spec/model/purl_resource_spec.rb | 13 ++++ 13 files changed, 383 insertions(+), 81 deletions(-) create mode 100644 lib/description.rb create mode 100644 lib/description/cocina_contributor.rb create mode 100644 lib/description/cocina_identifier.rb create mode 100644 lib/description/cocina_note.rb create mode 100644 lib/description/cocina_title.rb create mode 100644 lib/description/mods_formatted_name.rb create mode 100644 lib/description/mods_identifier.rb create mode 100644 lib/description/mods_origin_info.rb diff --git a/Gemfile b/Gemfile index 49b2a280..446a9b16 100644 --- a/Gemfile +++ b/Gemfile @@ -10,6 +10,7 @@ gem 'bootsnap', '>= 1.1.0', require: false # Reduces boot times through caching; gem 'cancancan' # authorization gem 'config' # simple rails environment specific config gem "cssbundling-rails", "~> 1.1" +gem 'dry-struct' # immutable value objects gem 'faraday' # HTTP client gem "geo_coord", require: "geo/coord" gem 'honeybadger' # exception reporting diff --git a/Gemfile.lock b/Gemfile.lock index b82ea6bc..d6530a1a 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -163,6 +163,11 @@ GEM dry-logic (>= 1.4, < 2) dry-types (>= 1.7, < 2) zeitwerk (~> 2.6) + dry-struct (1.6.0) + dry-core (~> 1.0, < 2) + dry-types (>= 1.7, < 2) + ice_nine (~> 0.11) + zeitwerk (~> 2.6) dry-types (1.7.1) concurrent-ruby (~> 1.0) dry-core (~> 1.0) @@ -190,6 +195,7 @@ GEM htmlentities (4.3.4) i18n (1.14.1) concurrent-ruby (~> 1.0) + ice_nine (0.11.2) iiif-presentation (1.2.0) activesupport (>= 3.2.18) faraday (~> 2.7) @@ -445,6 +451,7 @@ DEPENDENCIES debug dlss-capistrano dor-rights-auth (~> 1.6) + dry-struct faraday geo_coord honeybadger diff --git a/app/models/purl_resource.rb b/app/models/purl_resource.rb index e2567e09..b12d44c6 100644 --- a/app/models/purl_resource.rb +++ b/app/models/purl_resource.rb @@ -151,6 +151,7 @@ def schema_dot_org? concerning :Metadata do def title if mods? + # This is from ModsDisplay::HTML Array.wrap(mods.title).join(' -- ') else public_xml.title @@ -161,6 +162,7 @@ def description return unless mods? @description ||= begin + # This is from ModsDisplay::HTML abstract = mods.abstract.detect { |a| a.respond_to? :values } if abstract abstract.values.join.strip @@ -212,6 +214,8 @@ def use_and_reproduction delegate :released_to?, to: :public_xml + delegate :doi, :doi_id, to: :desc + def representative_thumbnail? representative_thumbnail.present? end @@ -220,22 +224,12 @@ def representative_thumbnail "#{iiif_manifest.thumbnail_base_uri}/full/!400,400/0/default.jpg" if iiif_manifest.thumbnail_base_uri.present? end - # @return [String,nil] DOI (with https://doi.org/ prefix) if present - def doi - @doi ||= mods_ng_document.root&.at_xpath('mods:identifier[@type="doi"]', mods: MODS_NS)&.text - end - - # @return [String,nil] DOI (without https://doi.org/ prefix) if present - def doi_id - doi&.delete_prefix('https://doi.org/') - end - def publication_date - @publication_date ||= ::Metadata::PublicationDate.call(mods_ng_document) + desc.publication_year end def authors - @authors ||= ::Metadata::Authors.call(mods_ng_document) + desc.formatted_contributors end def schema_dot_org @@ -325,6 +319,14 @@ def mods_ng_document @mods_ng_document ||= Nokogiri::XML(mods_body) end + def cocina_json + @cocina_json ||= cocina_body.present? ? JSON.parse(cocina_body) : nil + end + + def desc + @desc ||= Description.new(mods_ng: mods_ng_document, cocina_json:) + end + def logger Rails.logger end diff --git a/lib/description.rb b/lib/description.rb new file mode 100644 index 00000000..ed6f5a50 --- /dev/null +++ b/lib/description.rb @@ -0,0 +1,88 @@ +# require 'dry-struct' + +class Description + MODS_NS = 'http://www.loc.gov/mods/v3'.freeze + + # Temporarily accepting nils to avoid fixing all tests. + def initialize(mods_ng: nil, cocina_json: nil) + @mods_ng = mods_ng + @cocina_json = cocina_json + end + + # Conventions: + # * Prefer cocina naming. + # * "formatted_" indicates that a complex structure has been reduced to a string. + + delegate :doi, :doi_id, to: :mods_identifier + delegate :publication_year, to: :mods_origin_info + delegate :formatted_title, to: :cocina_title + delegate :descriptions, :formatted_description, to: :cocina_note + delegate :contributors, to: :cocina_contributor + + def formatted_contributors + # Temporary workaround to avoid fixing all tests. + # Otherwise, would be: delegate :doi, to: :cocina_identifier + cocina_json.present? ? cocina_contributor.formatted_contributors : mods_formatted_name.formatted_names + end + + def doi + # Temporary workaround to avoid fixing all tests. + # Otherwise, would be: delegate :doi, to: :cocina_identifier + cocina_json.present? ? cocina_identifier.doi : mods_identifier.doi + end + + def doi_id + # Temporary workaround to avoid fixing all tests. + # Otherwise, would be: delegate :doi_id, to: :cocina_identifier + cocina_json.present? ? cocina_identifier.doi_id : mods_identifier.doi_id + end + + private + + attr_reader :mods_ng, :cocina_json + + def cocina_identifier + @cocina_identifier ||= CocinaIdentifier.new(cocina_json:) + end + + def mods_identifier + @mods_identifier ||= ModsIdentifier.new(mods_ng:) + end + + def mods_origin_info + @mods_origin_info ||= ModsOriginInfo.new(mods_ng:) + end + + def mods_formatted_name + @mods_formatted_name ||= ModsFormattedName.new(mods_ng:) + end + + def cocina_title + @cocina_title ||= CocinaTitle.new(cocina_json:) + end + + def cocina_note + @cocina_note ||= CocinaNote.new(cocina_json:) + end + + def cocina_contributor + @cocina_contributor ||= CocinaContributor.new(cocina_json:) + end + + module Types + include Dry.Types() + end + + # Base class for Structs + class DescriptionStruct < Dry::Struct + transform_keys(&:to_sym) + schema schema.strict + end + + class Contributor < DescriptionStruct + attribute :name, Types::String + attribute? :forename, Types::String + attribute? :surname, Types::String + attribute? :orcid, Types::String + end +end diff --git a/lib/description/cocina_contributor.rb b/lib/description/cocina_contributor.rb new file mode 100644 index 00000000..a3a0904a --- /dev/null +++ b/lib/description/cocina_contributor.rb @@ -0,0 +1,81 @@ +class Description + class CocinaContributor + def initialize(cocina_json:) + @cocina_json = cocina_json + end + + # @return [Array] contributors + def contributors + @contributors ||= cocina_contributors.map { |cocina_contributor| contributor(cocina_contributor) } + end + + # @return [Array] contributors + def formatted_contributors + @formatted_contributors ||= contributors.map(&:name) + end + + private + + attr_reader :cocina_json + + def cocina_contributors + JsonPath.new('$.description.contributor[*]').on(@cocina_json) + end + + def contributor(cocina_contributor) + Description::Contributor.new( + **ContributorBuilder.new(cocina_contributor:).build + ) + end + + class ContributorBuilder + def initialize(cocina_contributor:) + @cocina_contributor = cocina_contributor + end + + def build + { name:, + forename:, + surname:, + orcid: }.compact + end + + private + + attr_reader :cocina_contributor + + def name + # contributor.name.value or concatenated contributor.name.structuredValue + JsonPath.new('$.name.value').first(cocina_contributor) || structured_name + end + + def structured_name + # concatenated contributor.name.structuredValue + [forename, surname].join(' ') + end + + def forename + # contributor.name.structuredValue.value with type "forename" + JsonPath.new("$.name[0].structuredValue[*].[?(@['type'] == 'forename')].value").first(cocina_contributor) + end + + def surname + # contributor.name.structuredValue.value with type "surname" + JsonPath.new("$.name[0].structuredValue[*].[?(@['type'] == 'surname')].value").first(cocina_contributor) + end + + def orcid + # contributor.identifier.uri or contributor.identifier.value with type "orcid" (case-insensitive), made into URI if identifier only + id_uri = JsonPath.new('$.identifier.uri').first(cocina_contributor) + return id_uri if id_uri.present? + + orcid = JsonPath.new("$.identifier.[?(@['type'] == 'ORCID' || @['type'] == 'orcid')].value").first(cocina_contributor) + return if orcid.blank? + + return orcid if orcid.start_with?('https://orcid.org') + + URI.join('https://orcid.org/', orcid).to_s + end + end + end +end diff --git a/lib/description/cocina_identifier.rb b/lib/description/cocina_identifier.rb new file mode 100644 index 00000000..663d3ffa --- /dev/null +++ b/lib/description/cocina_identifier.rb @@ -0,0 +1,31 @@ +class Description + class CocinaIdentifier + def initialize(cocina_json:) + @cocina_json = cocina_json + end + + # identification.doi or identifier.uri or identifier.value with type "doi" (case-insensitive), made into URI if identifier only + # @return [String,nil] DOI (with https://doi.org/ prefix) if present + def doi + @doi ||= begin + identifier = JsonPath.new('$.identification.doi').first(@cocina_json) || + JsonPath.new('$.description.identifier..uri').first(@cocina_json) || + JsonPath.new("$.description.identifier[?(@['type'] == 'doi')].value").first(@cocina_json) + if identifier&.start_with?('https://doi.org') + identifier + elsif identifier + URI.join('https://doi.org', identifier).to_s + end + end + end + + # @return [String,nil] DOI (without https://doi.org/ prefix) if present + def doi_id + doi&.delete_prefix('https://doi.org/') + end + + private + + attr_reader :cocina_json + end +end diff --git a/lib/description/cocina_note.rb b/lib/description/cocina_note.rb new file mode 100644 index 00000000..f649f871 --- /dev/null +++ b/lib/description/cocina_note.rb @@ -0,0 +1,22 @@ +class Description + class CocinaNote + def initialize(cocina_json:) + @cocina_json = cocina_json + end + + # value for description.note where type=summary or type=abstract + # @return [Array] description notes + def descriptions + @descriptions ||= JsonPath.new("$.description.note[?(@['type'] == 'summary' || @['type'] == 'abstract')].value").on(cocina_json) + end + + # @return [String, nil] formatted description + def formatted_description(delimiter: '\n') + @formatted_description ||= descriptions.join(delimiter) unless descriptions.empty? + end + + private + + attr_reader :cocina_json + end +end diff --git a/lib/description/cocina_title.rb b/lib/description/cocina_title.rb new file mode 100644 index 00000000..c294145d --- /dev/null +++ b/lib/description/cocina_title.rb @@ -0,0 +1,25 @@ +class Description + class CocinaTitle + def initialize(cocina_json:) + @cocina_json = cocina_json + end + + # Concatenated title.structuredValue for title with status "primary" if present + # Otherwise, title.value for first title + # @return [String, nil] formatted title + def formatted_title(delimiter: '\n') + @formatted_title ||= begin + titles = JsonPath.new("$.description.title[?(@['status' == 'primary'])].structuredValue[*].value").on(cocina_json) + if titles.present? + titles.join(delimiter) + else + JsonPath.new('$.description.title[0].value').first(cocina_json) + end + end + end + + private + + attr_reader :cocina_json + end +end diff --git a/lib/description/mods_formatted_name.rb b/lib/description/mods_formatted_name.rb new file mode 100644 index 00000000..b509532c --- /dev/null +++ b/lib/description/mods_formatted_name.rb @@ -0,0 +1,40 @@ +class Description + class ModsFormattedName + def initialize(mods_ng:) + @mods_ng = mods_ng + end + + # Names with author roles. + # Otherwise, names without roles. + # Otherwise, first name with any role. + # Names are formatted as a string with ModsDisplay::NameFormatter. + # @return [Array] formatted names + def formatted_names + @formatted_names ||= name_elements.map { |name_element| ModsDisplay::NameFormatter.format(name_element) } + end + + private + + attr_reader :mods_ng + + def name_elements + names_with_author_roles.to_a.presence \ + || names_without_roles.to_a.presence \ + || [first_name_with_any_role].compact.presence \ + || [] + end + + def names_with_author_roles + mods_ng.root&.xpath('mods:name[mods:role/mods:roleTerm[contains(text(), "AUT") ' \ + 'or contains(text(), "aut") or contains(text(), "author") or contains(text(), "Author")]]', mods: MODS_NS) + end + + def names_without_roles + mods_ng.root&.xpath('mods:name[count(mods:role) = 0]', mods: MODS_NS) + end + + def first_name_with_any_role + mods_ng.root&.at_xpath('mods:name[mods:role]', mods: MODS_NS) + end + end +end diff --git a/lib/description/mods_identifier.rb b/lib/description/mods_identifier.rb new file mode 100644 index 00000000..0d9a5f0f --- /dev/null +++ b/lib/description/mods_identifier.rb @@ -0,0 +1,23 @@ +class Description + class ModsIdentifier + MODS_NS = 'http://www.loc.gov/mods/v3'.freeze + + def initialize(mods_ng:) + @mods_ng = mods_ng + end + + # @return [String,nil] DOI (with https://doi.org/ prefix) if present + def doi + @doi ||= mods_ng.root&.at_xpath('mods:identifier[@type="doi"]', mods: MODS_NS)&.text + end + + # @return [String,nil] DOI (without https://doi.org/ prefix) if present + def doi_id + doi&.delete_prefix('https://doi.org/') + end + + private + + attr_reader :mods_ng + end +end diff --git a/lib/description/mods_origin_info.rb b/lib/description/mods_origin_info.rb new file mode 100644 index 00000000..1367ec84 --- /dev/null +++ b/lib/description/mods_origin_info.rb @@ -0,0 +1,25 @@ +class Description + class ModsOriginInfo + def initialize(mods_ng:) + @mods_ng = mods_ng + end + + # Year from publication originInfo with a dateIssued + # Otherwise, year from first originInfo with a dateIssued + # @return [String,nil] four-digit year if present + def publication_year + @publication_year ||= begin + date_element = mods_ng.root&.at_xpath('mods:originInfo[@eventType="publication" ' \ + 'or @eventType="Publication" or @eventType="PUBLICATION"]/mods:dateIssued', mods: MODS_NS) + date_element ||= mods_ng.root&.at_xpath('mods:originInfo/mods:dateIssued', mods: MODS_NS) + if (matcher = date_element&.text&.match(/(\d{4})/)) + matcher[1] + end + end + end + + private + + attr_reader :mods_ng + end +end diff --git a/lib/metadata/schema_dot_org.rb b/lib/metadata/schema_dot_org.rb index 10cfd2b8..9397c4e6 100644 --- a/lib/metadata/schema_dot_org.rb +++ b/lib/metadata/schema_dot_org.rb @@ -32,6 +32,10 @@ def schema_type? private + def desc + @desc ||= Description.new(cocina_json: @cocina_json) + end + def schema_type 'Dataset' if dataset? end @@ -45,34 +49,15 @@ def dataset? end def title_name - # title.value or concatenated title.structuredValue 1) for title with status "primary" if present 2) for first title - # required for Datasets - titles = JsonPath.new("$.description.title[?(@['status' == 'primary'])].structuredValue[*].value").on(@cocina_json) - return titles.join('\n') unless titles.empty? - - JsonPath.new('$.description.title[0].value').first(@cocina_json) + desc.formatted_title end def description - # description.note where type=summary or type=abstract, concatenating with \n if multiple - # required for Datasets - notes = JsonPath.new("$.description.note[?(@['type'] == 'summary' || @['type'] == 'abstract')].value").on(@cocina_json) - return notes.join('\n') unless notes.empty? - - # provide title (or other text?) in description if relevant note is missing - title_name + desc.formatted_description || title_name end def identifier - # identification.doi or identifier.uri or identifier.value with type "doi" (case-insensitive), made into URI if identifier only - identifier = JsonPath.new('$.identification.doi').first(@cocina_json) || - JsonPath.new('$.description.identifier..uri').first(@cocina_json) || - JsonPath.new("$.description.identifier[?(@['type'] == 'doi')].value").first(@cocina_json) - return unless identifier - - return [identifier] if identifier.start_with?('https://doi.org') - - [URI.join('https://doi.org', identifier).to_s] + [desc.doi].compact.presence end def access @@ -91,54 +76,13 @@ def url end def creators - # contributor.identifier.uri or contributor.identifier.value with type "orcid" (case-insensitive), made into URI if identifier only - creators = [] - contributors = JsonPath.new('$.description.contributor[*]').on(@cocina_json) - - contributors.each do |contributor| - creators.push( - { "@type": 'Person', - "name": creator_name(contributor), - "givenName": given_name(contributor), - "familyName": family_name(contributor), - "sameAs": orcid(contributor) }.compact - ) + desc.contributors.map do |contributor| + { "@type": 'Person', + "name": contributor.name, + "givenName": contributor.forename, + "familyName": contributor.surname, + "sameAs": contributor.orcid }.compact end - - creators - end - - def creator_name(contributor) - # contributor.name.value or concatenated contributor.name.structuredValue - JsonPath.new('$.name.value').first(contributor) || structured_name(contributor) - end - - def structured_name(contributor) - # concatenated contributor.name.structuredValue - [given_name(contributor), family_name(contributor)].join(' ') - end - - def given_name(contributor) - # contributor.name.structuredValue.value with type "forename" - JsonPath.new("$.name[0].structuredValue[*].[?(@['type'] == 'forename')].value").first(contributor) - end - - def family_name(contributor) - # contributor.name.structuredValue.value with type "surname" - JsonPath.new("$.name[0].structuredValue[*].[?(@['type'] == 'surname')].value").first(contributor) - end - - def orcid(contributor) - # contributor.identifier.uri or contributor.identifier.value with type "orcid" (case-insensitive), made into URI if identifier only - id_uri = JsonPath.new('$.identifier.uri').first(contributor) - return id_uri if id_uri.present? - - orcid = JsonPath.new("$.identifier.[?(@['type'] == 'ORCID' || @['type'] == 'orcid')].value").first(contributor) - return if orcid.blank? - - return orcid if orcid.start_with?('https://orcid.org') - - URI.join('https://orcid.org/', orcid).to_s end end end diff --git a/spec/model/purl_resource_spec.rb b/spec/model/purl_resource_spec.rb index e099af51..25f266e5 100644 --- a/spec/model/purl_resource_spec.rb +++ b/spec/model/purl_resource_spec.rb @@ -302,11 +302,18 @@ https://doi.org/10.25740/bb051dp0564 EOF + allow(subject).to receive(:cocina_body).and_return <<~JSON + { + "identification": {"doi": "10.25740/bb051dp0564"} + } + JSON end it 'returns the DOI' do expect(subject.doi).to eq 'https://doi.org/10.25740/bb051dp0564' expect(subject.doi_id).to eq '10.25740/bb051dp0564' + expect(subject).to have_received(:mods_body) + expect(subject).to have_received(:cocina_body).at_least(:once) end end @@ -317,11 +324,17 @@ EOF + allow(subject).to receive(:cocina_body).and_return <<~JSON + { + } + JSON end it 'returns nil' do expect(subject.doi).to be_nil expect(subject.doi_id).to be_nil + expect(subject).to have_received(:mods_body) + expect(subject).to have_received(:cocina_body).at_least(:once) end end end