diff --git a/Gemfile b/Gemfile index 848f9861..4414a9dd 100644 --- a/Gemfile +++ b/Gemfile @@ -42,6 +42,7 @@ end group :development do # Access an IRB console on exception pages or by using <%= console %> anywhere in the code. gem 'web-console', '>= 3.3.0' + gem 'byebug' end group :development, :test do diff --git a/Gemfile.lock b/Gemfile.lock index bbc24de2..17d35de9 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -89,6 +89,7 @@ GEM bundler-audit (0.9.1) bundler (>= 1.2.0, < 3) thor (~> 1.0) + byebug (11.1.3) cancancan (3.5.0) capistrano (3.18.0) airbrussh (>= 1.0.0) @@ -440,6 +441,7 @@ PLATFORMS DEPENDENCIES addressable bootsnap (>= 1.1.0) + byebug cancancan capistrano (~> 3.0) capistrano-bundler diff --git a/lib/metadata/schema_dot_org.rb b/lib/metadata/schema_dot_org.rb index 10cfd2b8..e01b18b5 100644 --- a/lib/metadata/schema_dot_org.rb +++ b/lib/metadata/schema_dot_org.rb @@ -1,4 +1,7 @@ +require 'byebug' + module Metadata + # rubocop:disable Metrics/ClassLength class SchemaDotOrg def self.call(cocina_json) new(cocina_json).call @@ -17,23 +20,21 @@ def call "@context": 'http://schema.org', "@type": schema_type, "name": title_name, - "identifier": identifier, - "description": description, - "isAccessibleForFree": access, - "license": license, - "url": url, - "creator": creators - }.compact + "description": description + }.merge(format_specific_fields) + .compact end def schema_type? - dataset? + dataset? || video? end private def schema_type - 'Dataset' if dataset? + return 'Dataset' if dataset? + + 'Video' if video? end def dataset? @@ -44,15 +45,39 @@ def dataset? false end + def video? + # Only return video metadata if world-downloadable. + video = JsonPath.new("$.description.form[?(@['value'] == 'moving image' && @['type'] == 'resource type')]").on(@cocina_json) + return true if video.any? && access? + + false + end + def title_name # title.value or concatenated title.structuredValue 1) for title with status "primary" if present 2) for first title - # required for Datasets + # required for Datasets and Videos titles = JsonPath.new("$.description.title[?(@['status' == 'primary'])].structuredValue[*].value").on(@cocina_json) return titles.join('\n') unless titles.empty? JsonPath.new('$.description.title[0].value').first(@cocina_json) end + def format_specific_fields + if dataset? + return { "identifier": identifier, + "isAccessibleForFree": access?, + "license": license, + "url": url, + "creator": creators } + elsif video? + return { "thumbnailUrl": thumbnail, + # "uploadDate": upload_date, + "embedUrl": embed_url } + # "duration": duration + end + {} + end + def description # description.note where type=summary or type=abstract, concatenating with \n if multiple # required for Datasets @@ -75,7 +100,7 @@ def identifier [URI.join('https://doi.org', identifier).to_s] end - def access + def access? # true if access.download = "world" return true if JsonPath.new("$.access[?(@['download'] == 'world')]").first(@cocina_json) @@ -140,5 +165,56 @@ def orcid(contributor) URI.join('https://orcid.org/', orcid).to_s end + + def embed_url + response = Faraday.get(iframe_url) + return unless response.success? + + iframe_html = Nokogiri::HTML(get_html(response.body)) + iframe_html.css('iframe')[0]['src'] + end + + def get_html(response_body) + JsonPath.new('$.html').first(response_body) + end + + def iframe_url + oembed_url_template.expand(format: 'json', url: embeddable_url).to_s + end + + def oembed_url_template + Addressable::Template.new(Settings.embed.url_template) + end + + def embeddable_url + format(Settings.embed.url, druid: bare_druid) + end + + def bare_druid + druid.split('druid:').last + end + + def druid + JsonPath.new('$.externalIdentifier').first(@cocina_json) + end + + def thumbnail + # required for Videos + # structural.contains.filename with hasMimeType = "image/jp2" where structural.contains has type https://cocina.sul.stanford.edu/models/resources/video", + video = JsonPath.new("$.structural.contains[*][?(@['type'] == 'https://cocina.sul.stanford.edu/models/resources/video')]").on(@cocina_json) + filename = JsonPath.new("$[*].structural.contains[*][?(@['hasMimeType'] == 'image/jp2')].filename").first(video) + return if filename.blank? + + URI.join(Settings.stacks.url, "file/#{druid}/#{filename}").to_s + end + + def upload_date + # required for Videos + # event.date.value or event.date.structuredValue.value with event.date.type "publication" and event.date.status "primary" + # first event.date.value or event.date.structuredValue.value with event.date.type "publication" + # first event.date.value or event.date.structuredValue.value with event.type "publication" and event.date.type null + # first event.date.value or event.date.structuredValue with event.type null and event.date.type null + end end + # rubocop:enable Metrics/ClassLength end diff --git a/spec/lib/metadata/schema_dot_org_spec.rb b/spec/lib/metadata/schema_dot_org_spec.rb index 731fa939..43e88218 100644 --- a/spec/lib/metadata/schema_dot_org_spec.rb +++ b/spec/lib/metadata/schema_dot_org_spec.rb @@ -26,7 +26,7 @@ end end - context 'without a dataset genre' do + context 'without a dataset or media form' do let(:cocina_json) do <<~JSON { @@ -41,9 +41,40 @@ JSON end - it 'does not have type of Dataset' do - expect(schema_dot_org).not_to include( - "@type": 'Dataset' + it 'does not have type' do + expect(schema_dot_org).not_to have_key('@type') + end + end + + context 'with a moving image resource type' do + let(:cocina_json) do + <<~JSON + { + "externalIdentifier": "druid:hj293cv5980", + "label": "A video about robots", + "description": { + "form": [{ "value": "moving image", + "type": "resource type" }] + }, + "access": {"download": "world"} + } + JSON + end + let(:body) do + { 'type' => 'rich', 'version' => '1.0', 'provider_name' => 'SUL Embed Service', 'title' => 'Oral history', 'height' => 400, 'width' => nil, + 'html' => '' } + .to_json + end + + before do + stub_request(:get, 'https://embed.stanford.edu/embed.json?url=https://purl.stanford.edu/hj293cv5980') + .to_return(status: 200, body:) + end + + it 'has type of Video' do + expect(schema_dot_org).to include( + "@context": 'http://schema.org', + "@type": 'Video' ) end end @@ -82,6 +113,8 @@ ], "status": "primary"} ], + "form": [{ "value": "dataset", + "type": "genre" }], "identifier": [] } } @@ -100,7 +133,9 @@ <<~JSON { "description": { - "note": [{"type": "abstract", "value": "About this dataset"}] + "note": [{"type": "abstract", "value": "About this item"}], + "form": [{ "value": "dataset", + "type": "genre" }] } } JSON @@ -108,7 +143,7 @@ it 'includes the description' do expect(schema_dot_org).to include( - "description": 'About this dataset' + "description": 'About this item' ) end end @@ -118,7 +153,9 @@ <<~JSON { "description": { - "note": [{"type": "summary", "value": "About this dataset"}] + "note": [{"type": "summary", "value": "About this dataset"}], + "form": [{"value": "dataset", + "type": "genre" }] } } JSON @@ -136,7 +173,9 @@ <<~JSON { "description": { - "title": [{"value": "My Dataset"}] + "title": [{"value": "My Dataset"}], + "form": [{"value": "dataset", + "type": "genre" }] } } JSON @@ -148,319 +187,460 @@ ) end end + + context 'with a Dataset' do + context 'with DOI in identification' do + let(:cocina_json) do + <<~JSON + { + "description": { + "title": [{"value": "My Dataset"}], + "form": [{"value": "dataset", + "type": "genre" }] + }, + "identification": {"doi": "10.25740/hj293cv5980"} + } + JSON + end - context 'with DOI in identification' do - let(:cocina_json) do - <<~JSON - { - "description": { - "title": [{"value": "My Dataset"}] - }, - "identification": {"doi": "10.25740/hj293cv5980"} - } - JSON + it 'includes the DOI' do + expect(schema_dot_org).to include( + "identifier": ['https://doi.org/10.25740/hj293cv5980'] + ) + end end - it 'includes the DOI' do - expect(schema_dot_org).to include( - "identifier": ['https://doi.org/10.25740/hj293cv5980'] - ) - end - end + context 'with DOI in identifier uri' do + let(:cocina_json) do + <<~JSON + { + "description": { "form": [{ "value": "dataset", + "type": "genre" }], + "identifier": [ + { "uri": "https://doi.org/10.25740/hj293cv5980" } + ] + } + } + JSON + end - context 'with DOI in identifier uri' do - let(:cocina_json) do - <<~JSON - { - "description": { "form": [{ "value": "dataset", - "type": "genre" }], - "identifier": [ - { "uri": "https://doi.org/10.25740/hj293cv5980" } - ] - } - } - JSON + it 'includes the DOI' do + expect(schema_dot_org).to include( + "identifier": ['https://doi.org/10.25740/hj293cv5980'] + ) + end end - it 'includes the DOI' do - expect(schema_dot_org).to include( - "identifier": ['https://doi.org/10.25740/hj293cv5980'] - ) - end - end + context 'with DOI in identifier value' do + let(:cocina_json) do + <<~JSON + { + "description": { "form": [{ "value": "dataset", + "type": "genre" }], + "identifier": [ + { "value": "https://doi.org/10.25740/hj293cv5980", + "type": "doi" } + ] + } + } + JSON + end - context 'with DOI in identifier value' do - let(:cocina_json) do - <<~JSON - { - "description": { "form": [{ "value": "dataset", - "type": "genre" }], - "identifier": [ - { "value": "https://doi.org/10.25740/hj293cv5980", - "type": "doi" } - ] - } - } - JSON + it 'includes the DOI' do + expect(schema_dot_org).to include( + "identifier": ['https://doi.org/10.25740/hj293cv5980'] + ) + end end - it 'includes the DOI' do - expect(schema_dot_org).to include( - "identifier": ['https://doi.org/10.25740/hj293cv5980'] - ) - end - end + context 'with no identifiers' do + let(:cocina_json) do + <<~JSON + { + "description": { "form": [{ "value": "dataset", + "type": "genre" }], + "identifier": [] + } + } + JSON + end - context 'with no identifiers' do - let(:cocina_json) do - <<~JSON - { - "description": { "form": [{ "value": "dataset", - "type": "genre" }], - "identifier": [] - } - } - JSON + it 'includes no identifier' do + expect(schema_dot_org).not_to have_key('identifier') + end end - it 'includes no identifier' do - expect(schema_dot_org).not_to have_key('identifier') - end - end + context 'when world downloadable' do + let(:cocina_json) do + <<~JSON + { + "description": { "form": [{ "value": "dataset", + "type": "genre" }] + }, + "access": {"download": "world"} + } + JSON + end - context 'when world downloadable' do - let(:cocina_json) do - <<~JSON - { - "description": { "form": [{ "value": "dataset", - "type": "genre" }] - }, - "access": {"download": "world"} - } - JSON + it 'is accessibleForFree' do + expect(schema_dot_org).to include( + "isAccessibleForFree": true + ) + end end - it 'is accessibleForFree' do - expect(schema_dot_org).to include( - "isAccessibleForFree": true - ) - end - end + context 'when not world downloadable' do + let(:cocina_json) do + <<~JSON + { + "description": { "form": [{ "value": "dataset", + "type": "genre" }] + }, + "access": {"download": "stanford"} + } + JSON + end - context 'when not world downloadable' do - let(:cocina_json) do - <<~JSON - { - "description": { "form": [{ "value": "dataset", - "type": "genre" }] - }, - "access": {"download": "stanford"} - } - JSON + it 'is not isAccessibleForFree' do + expect(schema_dot_org).to include( + "isAccessibleForFree": false + ) + end end - it 'is not isAccessibleForFree' do - expect(schema_dot_org).to include( - "isAccessibleForFree": false - ) - end - end + context 'with a license' do + let(:cocina_json) do + <<~JSON + { + "description": { "form": [{ "value": "dataset", + "type": "genre" }] + }, + "access": {"license": "https://opendatacommons.org/licenses/by/1-0/"} + } + JSON + end - context 'with a license' do - let(:cocina_json) do - <<~JSON - { - "description": { "form": [{ "value": "dataset", - "type": "genre" }] - }, - "access": {"license": "https://opendatacommons.org/licenses/by/1-0/"} - } - JSON + it 'includes the license' do + expect(schema_dot_org).to include( + "license": 'https://opendatacommons.org/licenses/by/1-0/' + ) + end end - it 'includes the license' do - expect(schema_dot_org).to include( - "license": 'https://opendatacommons.org/licenses/by/1-0/' - ) - end - end + context 'with a purl' do + let(:cocina_json) do + <<~JSON + { + "description": { "form": [{ "value": "dataset", + "type": "genre" }], + "purl": "https://purl.stanford.edu/hj293cv5980" + } + } + JSON + end - context 'with a purl' do - let(:cocina_json) do - <<~JSON - { - "description": { "form": [{ "value": "dataset", - "type": "genre" }], - "purl": "https://purl.stanford.edu/hj293cv5980" - } - } - JSON + it 'includes a url' do + expect(schema_dot_org).to include( + "url": 'https://purl.stanford.edu/hj293cv5980' + ) + end end - it 'includes a url' do - expect(schema_dot_org).to include( - "url": 'https://purl.stanford.edu/hj293cv5980' - ) - end - end + context 'with contributors in a name' do + let(:cocina_json) do + <<~JSON + { + "description": { "form": [{ "value": "dataset", + "type": "genre" }], + "contributor": [{"name": {"value": "Doe, Jane"}}, + {"name": {"value": "Foo, John"}}] + } + } + JSON + end - context 'with contributors in a name' do - let(:cocina_json) do - <<~JSON - { - "description": { "form": [{ "value": "dataset", - "type": "genre" }], - "contributor": [{"name": {"value": "Doe, Jane"}}, - {"name": {"value": "Foo, John"}}] - } - } - JSON + it 'includes creator' do + expect(schema_dot_org).to include( + "creator": [{ + "@type": 'Person', + "name": 'Doe, Jane' + }, { + "@type": 'Person', + "name": 'Foo, John' + }] + ) + end end - it 'includes creator' do - expect(schema_dot_org).to include( - "creator": [{ - "@type": 'Person', - "name": 'Doe, Jane' - }, { - "@type": 'Person', - "name": 'Foo, John' - }] - ) - end - end + context 'with contributors in a structuredValue' do + let(:cocina_json) do + <<~JSON + { + "description": { "form": [{ "value": "dataset", + "type": "genre" }], + "contributor": [ + {"name": [ + { "structuredValue": [ + { "value": "Jane", + "type": "forename" }, + { "value": "Doe", + "type": "surname"}] + } + ]}, + {"name": [ + { "structuredValue": [ + { "value": "John", + "type": "forename"}, + { "value": "Foo", + "type": "surname"}] + } + ]} + ] + } + } + JSON + end + + it 'includes creator' do + expect(schema_dot_org).to include( + "creator": [ + { + "@type": 'Person', + "name": 'Jane Doe', + "givenName": 'Jane', + "familyName": 'Doe' + }, + { + "@type": 'Person', + "name": 'John Foo', + "givenName": 'John', + "familyName": 'Foo' + } + ] + ) + end + end + + context 'with an ORCID uri for the contributor' do + let(:cocina_json) do + <<~JSON + { + "description": { "form": [{ "value": "dataset", + "type": "genre" }], + "contributor": [{"name": {"value": "Doe, Jane"}, + "identifier": {"uri": "https://orcid.org/0000-0000-0000-0000"}}] + } + } + JSON + end - context 'with contributors in a structuredValue' do - let(:cocina_json) do - <<~JSON - { - "description": { "form": [{ "value": "dataset", - "type": "genre" }], - "contributor": [ - {"name": [ - { "structuredValue": [ - { "value": "Jane", - "type": "forename" }, - { "value": "Doe", - "type": "surname"}] - } - ]}, - {"name": [ - { "structuredValue": [ - { "value": "John", - "type": "forename"}, - { "value": "Foo", - "type": "surname"}] - } - ]} - ] - } - } - JSON + it 'includes the ORCID' do + expect(schema_dot_org).to include( + "creator": [{ + "@type": 'Person', + "name": 'Doe, Jane', + "sameAs": 'https://orcid.org/0000-0000-0000-0000' + }] + ) + end end - it 'includes creator' do - expect(schema_dot_org).to include( - "creator": [ + context 'with an identifier of ORCID type for the contributor' do + let(:cocina_json) do + <<~JSON { + "description": { "form": [{ "value": "dataset", + "type": "genre" }], + "contributor": [{"name": {"value": "Doe, Jane"}, + "identifier": {"value": "0000-0000-0000-0000", + "type": "ORCID"} + }] + } + } + JSON + end + + it 'includes the ORCID' do + expect(schema_dot_org).to include( + "creator": [{ + "@type": 'Person', + "name": 'Doe, Jane', + "sameAs": 'https://orcid.org/0000-0000-0000-0000' + }] + ) + end + end + + context 'with a structuredValue name and ORCID' do + let(:cocina_json) do + <<~JSON + { "description": { "form": [{ "value": "dataset", + "type": "genre" }], + "contributor": [ + { "name": [ + { "structuredValue": [ + { "value": "Jane", + "type": "forename" }, + { "value": "Doe", + "type": "surname"} + ] + } + ], + "identifier": {"uri": "https://orcid.org/0000-0000-0000-0000"} + }] + } + } + JSON + end + + it 'includes the ORCID' do + expect(schema_dot_org).to include( + "creator": [{ "@type": 'Person', "name": 'Jane Doe', "givenName": 'Jane', - "familyName": 'Doe' - }, - { - "@type": 'Person', - "name": 'John Foo', - "givenName": 'John', - "familyName": 'Foo' - } - ] - ) + "familyName": 'Doe', + "sameAs": 'https://orcid.org/0000-0000-0000-0000' + }] + ) + end end end - context 'with an ORCID uri for the contributor' do - let(:cocina_json) do - <<~JSON - { - "description": { "form": [{ "value": "dataset", - "type": "genre" }], - "contributor": [{"name": {"value": "Doe, Jane"}, - "identifier": {"uri": "https://orcid.org/0000-0000-0000-0000"}}] - } - } - JSON + context 'with a Video' do + let(:body) do + { 'type' => 'rich', 'version' => '1.0', 'provider_name' => 'SUL Embed Service', 'title' => 'Oral history', 'height' => 400, 'width' => nil, + 'html' => '' } + .to_json end - it 'includes the ORCID' do - expect(schema_dot_org).to include( - "creator": [{ - "@type": 'Person', - "name": 'Doe, Jane', - "sameAs": 'https://orcid.org/0000-0000-0000-0000' - }] - ) + before do + allow(Settings.stacks).to receive(:url).and_return('https://example.com') + allow(Settings.embed).to receive(:url_template).and_return('https://embed.stanford.edu/embed{.format}{?url*,application_options*}') + allow(Settings.embed).to receive(:url).and_return('https://purl.stanford.edu/%{druid}') end - end - context 'with an identifier of ORCID type for the contributor' do - let(:cocina_json) do - <<~JSON - { - "description": { "form": [{ "value": "dataset", - "type": "genre" }], - "contributor": [{"name": {"value": "Doe, Jane"}, - "identifier": {"value": "0000-0000-0000-0000", - "type": "ORCID"} - }] + context 'with a thumbnail' do + let(:cocina_json) do + <<~JSON + { + "externalIdentifier": "druid:tt618qz3245", + "description": { "form": [{ "value": "moving image", + "type": "resource type" }] + }, + "access": {"download": "world"}, + "structural": {"contains": [{"type": "https://cocina.sul.stanford.edu/models/resources/video", + "structural": {"contains": [{"filename": "tt618qz3245_thumb.jp2", + "hasMimeType": "image/jp2"}, + {"filename": "tt618qz3245_history_sl.mp4", + "hasMimeType": "video/mp4"}] + } + }] } - } - JSON + } + JSON + end + + before do + stub_request(:get, 'https://embed.stanford.edu/embed.json?url=https://purl.stanford.edu/tt618qz3245') + .to_return(status: 200, body:) + end + + it 'includes the thumbnail' do + expect(schema_dot_org).to include( + "thumbnailUrl": 'https://example.com/file/druid:tt618qz3245/tt618qz3245_thumb.jp2' + ) + end end - it 'includes the ORCID' do - expect(schema_dot_org).to include( - "creator": [{ - "@type": 'Person', - "name": 'Doe, Jane', - "sameAs": 'https://orcid.org/0000-0000-0000-0000' - }] - ) + context 'with no thumbnail' do + let(:cocina_json) do + <<~JSON + { + "externalIdentifier": "druid:tt618qz3245", + "description": { "form": [{ "value": "moving image", + "type": "resource type" }] + }, + "access": {"download": "world"}, + "structural": {"contains": [{"type": "https://cocina.sul.stanford.edu/models/resources/video", + "structural": {"contains": [{"filename": "tt618qz3245_history_sl.mp4", + "hasMimeType": "video/mp4"}] + } + }] + } + } + JSON + end + + before do + stub_request(:get, 'https://embed.stanford.edu/embed.json?url=https://purl.stanford.edu/tt618qz3245') + .to_return(status: 200, body:) + end + + it 'does not include a thumbnail' do + expect(schema_dot_org).not_to have_key('thumbnailUrl') + end end - end - context 'with a structuredValue name and ORCID' do - let(:cocina_json) do - <<~JSON - { "description": { "form": [{ "value": "dataset", - "type": "genre" }], - "contributor": [ - { "name": [ - { "structuredValue": [ - { "value": "Jane", - "type": "forename" }, - { "value": "Doe", - "type": "surname"} - ] - } - ], - "identifier": {"uri": "https://orcid.org/0000-0000-0000-0000"} - }] + context 'with an embeddable video' do + let(:cocina_json) do + <<~JSON + { + "externalIdentifier": "druid:tn153br1253", + "description": { "form": [{ "value": "moving image", + "type": "resource type" }] + }, + "access": {"download": "world"}, + "structural": {"contains": [{"type": "https://cocina.sul.stanford.edu/models/resources/video", + "contains": [{"filename": "tn153br1253_thumb.jp2", + "hasMimeType": "image/jp2"}, + {"filename": "tn153br1253_29U_Alak_Rita_Rinpoche_sl.mp4", + "hasMimeType": "video/mp4"}] + }] } - } - JSON + } + JSON + end + + before do + stub_request(:get, 'https://embed.stanford.edu/embed.json?url=https://purl.stanford.edu/tn153br1253') + .to_return(status: 200, body:) + end + + it 'includes the embed_url' do + expect(schema_dot_org).to include( + "embedUrl": 'https://embed.stanford.edu/iframe?url=https://purl.stanford.edu/tn153br1253&_v=1701092229' + ) + end end - it 'includes the ORCID' do - expect(schema_dot_org).to include( - "creator": [{ - "@type": 'Person', - "name": 'Jane Doe', - "givenName": 'Jane', - "familyName": 'Doe', - "sameAs": 'https://orcid.org/0000-0000-0000-0000' - }] - ) + context 'when sul-embed is unreachable' do + let(:cocina_json) do + <<~JSON + { + "externalIdentifier": "druid:tn153br1253", + "description": { "form": [{ "value": "moving image", + "type": "resource type" }] + }, + "access": {"download": "world"}, + "structural": {"contains": [{"type": "https://cocina.sul.stanford.edu/models/resources/video", + "contains": [{"filename": "tn153br1253_thumb.jp2", + "hasMimeType": "image/jp2"}, + {"filename": "tn153br1253_29U_Alak_Rita_Rinpoche_sl.mp4", + "hasMimeType": "video/mp4"}] + }] + } + } + JSON + end + + before do + stub_request(:get, 'https://embed.stanford.edu/embed.json?url=https%3A%2F%2Fpurl.stanford.edu%2Ftn153br1253').to_return(status: 404) + end + + it 'does not include the embedUrl' do + expect(schema_dot_org).not_to have_key('embedUrl') + end end end end diff --git a/spec/rails_helper.rb b/spec/rails_helper.rb index bc88d48d..5d8a2392 100644 --- a/spec/rails_helper.rb +++ b/spec/rails_helper.rb @@ -6,6 +6,7 @@ # Add additional requires below this line. Rails is not loaded until this point! require 'capybara/rails' require 'selenium-webdriver' +require 'webmock/rspec' Capybara.javascript_driver = :selenium_chrome_headless