Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

schema.org markup for video objects #834

Merged
merged 1 commit into from
Dec 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ end
group :development do
# Access an IRB console on exception pages or by using <%= console %> anywhere in the code.
gem 'web-console', '>= 3.3.0'
gem 'byebug'
end

group :development, :test do
Expand Down
2 changes: 2 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ GEM
bundler-audit (0.9.1)
bundler (>= 1.2.0, < 3)
thor (~> 1.0)
byebug (11.1.3)
cancancan (3.5.0)
capistrano (3.18.0)
airbrussh (>= 1.0.0)
Expand Down Expand Up @@ -442,6 +443,7 @@ PLATFORMS
DEPENDENCIES
addressable
bootsnap (>= 1.1.0)
byebug
cancancan
capistrano (~> 3.0)
capistrano-bundler
Expand Down
153 changes: 124 additions & 29 deletions lib/metadata/schema_dot_org.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
module Metadata
# rubocop:disable Metrics/ClassLength
class SchemaDotOrg
def self.call(cocina_json)
new(cocina_json).call
Expand All @@ -13,75 +14,96 @@ def initialize(cocina_json)
end

def call
{
"@context": 'http://schema.org',
{ "@context": 'http://schema.org',
"@type": schema_type,
"name": title_name,
"identifier": identifier,
"description": description,
"isAccessibleForFree": access,
"license": license,
"url": url,
"creator": creators
}.compact
"description": description }
.merge(format_specific_fields)
.compact
end

def schema_type?
dataset?
dataset? || render_video_metadata?
end

private

def schema_type
'Dataset' if dataset?
return 'Dataset' if dataset?

'VideoObject' if render_video_metadata?
end

def dataset?
# has a form with value of dataset and type of genre
dataset = JsonPath.new("$.description.form[?(@['value'] == 'dataset' && @['type'] == 'genre')]").on(@cocina_json)
return true if dataset.any?
dataset.any?
end

false
def render_video_metadata?
# Only return video metadata if world-downloadable.
video = JsonPath.new("$.structural.contains[?(@['type'] == 'https://cocina.sul.stanford.edu/models/resources/video')]").on(@cocina_json)
video.any? && object_access? && video_access?
end

def title_name
# title.value or concatenated title.structuredValue 1) for title with status "primary" if present 2) for first title
# required for Datasets
# required for Datasets and Videos
titles = JsonPath.new("$.description.title[?(@['status' == 'primary'])].structuredValue[*].value").on(@cocina_json)
return titles.join('\n') unless titles.empty?
return titles.join(': ') unless titles.empty?

JsonPath.new('$.description.title[0].value').first(@cocina_json)
end

def format_specific_fields
if dataset?
return { "identifier": identifier,
"isAccessibleForFree": object_access?,
"license": license,
"url": url,
"creator": creators }
elsif render_video_metadata?
return { "thumbnailUrl": thumbnail,
"uploadDate": upload_date,
"embedUrl": embed_url }
end
{}
end

def description
# description.note where type=summary or type=abstract, concatenating with \n if multiple
# required for Datasets
notes = JsonPath.new("$.description.note[?(@['type'] == 'summary' || @['type'] == 'abstract')].value").on(@cocina_json)
return notes.join('\n') unless notes.empty?

# provide title (or other text?) in description if relevant note is missing
title_name
notes.join('\n') unless notes.empty?
end

def identifier
# identification.doi or identifier.uri or identifier.value with type "doi" (case-insensitive), made into URI if identifier only
identifier = JsonPath.new('$.identification.doi').first(@cocina_json) ||
JsonPath.new('$.description.identifier..uri').first(@cocina_json) ||
JsonPath.new("$.description.identifier[?(@['type'] == 'doi')].value").first(@cocina_json)
return unless identifier
# identification.doi or identifier.uri including doi.org or identifier.value with type "doi" (case-insensitive), made into URI if identifier only
doi_id = JsonPath.new('$.identification.doi').first(@cocina_json) ||
JsonPath.new("$.description.identifier[?(@['type'] == 'doi')].value").first(@cocina_json) ||
JsonPath.new("$.description.identifier[?(@['uri'] =~ /doi/)].uri").first(@cocina_json)
return unless doi_id

return [identifier] if identifier.start_with?('https://doi.org')
return [doi_id] if doi_id.start_with?('https://doi.org')

[URI.join('https://doi.org', identifier).to_s]
[URI.join('https://doi.org', doi_id).to_s]
end

def access
def object_access?
# true if access.download = "world"
return true if JsonPath.new("$.access[?(@['download'] == 'world')]").first(@cocina_json)

false
end

def video_access?
video = JsonPath.new("$.structural.contains[*][?(@['type'] == 'https://cocina.sul.stanford.edu/models/resources/video')]").on(@cocina_json)
# need to find the file that is the one for the video (based on mime-type). Then get the access and download rights for that.
file_access = JsonPath.new('$[*].structural.contains[*][?(@.hasMimeType =~ /video/)].access.download').first(video)

file_access == 'world'
end

def license
JsonPath.new('$.access.license').first(@cocina_json)
end
Expand Down Expand Up @@ -130,8 +152,8 @@ def family_name(contributor)

def orcid(contributor)
# contributor.identifier.uri or contributor.identifier.value with type "orcid" (case-insensitive), made into URI if identifier only
id_uri = JsonPath.new('$.identifier.uri').first(contributor)
return id_uri if id_uri.present?
identifier = JsonPath.new('$.identifier.uri').first(contributor)
return identifier if identifier.present?

orcid = JsonPath.new("$.identifier.[?(@['type'] == 'ORCID' || @['type'] == 'orcid')].value").first(contributor)
return if orcid.blank?
Expand All @@ -140,5 +162,78 @@ def orcid(contributor)

URI.join('https://orcid.org/', orcid).to_s
end

def embed_url
iframe_url_template.expand(url: embeddable_url).to_s
end

def iframe_url_template
Addressable::Template.new(Settings.embed.iframe.url_template)
end

def embeddable_url
format(Settings.embed.url, druid: bare_druid)
end

def bare_druid
druid.delete_prefix('druid:')
end

def druid
JsonPath.new('$.externalIdentifier').first(@cocina_json)
end

def thumbnail
# required for Videos
# structural.contains.filename with hasMimeType = "image/jp2" where structural.contains has type https://cocina.sul.stanford.edu/models/resources/video",
video = JsonPath.new("$.structural.contains[*][?(@['type'] == 'https://cocina.sul.stanford.edu/models/resources/video')]").on(@cocina_json)
filename = JsonPath.new("$[*].structural.contains[*][?(@['hasMimeType'] == 'image/jp2')].filename").first(video)
return if filename.blank?

URI.join(Settings.stacks.url, "file/#{druid}/#{filename}").to_s
end

def upload_date
# required for Videos
# event.date.value or event.date.structuredValue.value with event.date.type "publication" and event.date.status "primary"
# first event.date.value or event.date.structuredValue.value with event.date.type "publication"
events = JsonPath.new('$.description.event[*]').on(@cocina_json)
JsonPath.new("$[*].date[*][?(@['type'] == 'publication' && @['status'] == 'primary')].value").first(events) ||
JsonPath.new("$[*].date[*][?(@['type'] == 'publication' && @['status'] == 'primary')].structuredValue[*].value").first(events) ||
JsonPath.new("$[*].date[*][?(@['type'] == 'publication')].value").first(events) ||
JsonPath.new("$[*].date[*][?(@['type'] == 'publication')].structuredValue[*].value").first(events) ||
no_date_type(events) || no_event_type(events)
end

def no_date_type(events)
# first event.date.value or event.date.structuredValue.value with event.type "publication" and event.date.type null
return unless events.any?

dates = JsonPath.new("$.[?(@['type']) == 'publication')].date[*].[?(@['value'])]").on(events)
structured_dates = JsonPath.new("$.[?(@['type']) == 'publication')].date[*].structuredValue[*]").on(events)
dates.concat(structured_dates)
return unless dates.any?

date_value(dates)
end

def no_event_type(events)
# first event.date.value or event.date.structuredValue with event.type null and event.date.type null
return unless events.any?

events.select! { |event| event.key?('type') == false }
dates = JsonPath.new('$[*].date[*]').on(events)
return unless dates.any?

date_value(dates)
end

def date_value(dates)
dates.select! { |date| date.key?('type') == false }
return unless dates.any?

dates.first.fetch('value')
end
end
# rubocop:enable Metrics/ClassLength
end
Loading