Skip to content

Commit

Permalink
schema.org markup for video objects
Browse files Browse the repository at this point in the history
  • Loading branch information
lwrubel committed Dec 8, 2023
1 parent 7387bfc commit 97e4daf
Show file tree
Hide file tree
Showing 5 changed files with 850 additions and 306 deletions.
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ end
group :development do
# Access an IRB console on exception pages or by using <%= console %> anywhere in the code.
gem 'web-console', '>= 3.3.0'
gem 'byebug'
end

group :development, :test do
Expand Down
2 changes: 2 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ GEM
bundler-audit (0.9.1)
bundler (>= 1.2.0, < 3)
thor (~> 1.0)
byebug (11.1.3)
cancancan (3.5.0)
capistrano (3.18.0)
airbrussh (>= 1.0.0)
Expand Down Expand Up @@ -442,6 +443,7 @@ PLATFORMS
DEPENDENCIES
addressable
bootsnap (>= 1.1.0)
byebug
cancancan
capistrano (~> 3.0)
capistrano-bundler
Expand Down
155 changes: 128 additions & 27 deletions lib/metadata/schema_dot_org.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
module Metadata
# rubocop:disable Metrics/ClassLength
class SchemaDotOrg
def self.call(cocina_json)
new(cocina_json).call
Expand All @@ -13,27 +14,24 @@ def initialize(cocina_json)
end

def call
{
"@context": 'http://schema.org',
{ "@context": 'http://schema.org',
"@type": schema_type,
"name": title_name,
"identifier": identifier,
"description": description,
"isAccessibleForFree": access,
"license": license,
"url": url,
"creator": creators
}.compact
"description": description }
.merge(format_specific_fields)
.compact
end

def schema_type?
dataset?
dataset? || video?
end

private

def schema_type
'Dataset' if dataset?
return 'Dataset' if dataset?

'VideoObject' if video?
end

def dataset?
Expand All @@ -44,44 +42,74 @@ def dataset?
false
end

def video?
# Only return video metadata if world-downloadable.
video = JsonPath.new("$.structural.contains[?(@['type'] == 'https://cocina.sul.stanford.edu/models/resources/video')]").on(@cocina_json)
return true if video.any? && access? && video_access?

false
end

def title_name
# title.value or concatenated title.structuredValue 1) for title with status "primary" if present 2) for first title
# required for Datasets
# required for Datasets and Videos
titles = JsonPath.new("$.description.title[?(@['status' == 'primary'])].structuredValue[*].value").on(@cocina_json)
return titles.join('\n') unless titles.empty?
return titles.join(': ') unless titles.empty?

JsonPath.new('$.description.title[0].value').first(@cocina_json)
end

def format_specific_fields
if dataset?
return { "identifier": identifier,
"isAccessibleForFree": access?,
"license": license,
"url": url,
"creator": creators }
elsif video?
return { "thumbnailUrl": thumbnail,
"uploadDate": upload_date,
"embedUrl": embed_url }
end
{}
end

def description
# description.note where type=summary or type=abstract, concatenating with \n if multiple
# required for Datasets
notes = JsonPath.new("$.description.note[?(@['type'] == 'summary' || @['type'] == 'abstract')].value").on(@cocina_json)
return notes.join('\n') unless notes.empty?

# provide title (or other text?) in description if relevant note is missing
title_name
notes.join('\n') unless notes.empty?
end

def identifier
# identification.doi or identifier.uri or identifier.value with type "doi" (case-insensitive), made into URI if identifier only
identifier = JsonPath.new('$.identification.doi').first(@cocina_json) ||
JsonPath.new('$.description.identifier..uri').first(@cocina_json) ||
JsonPath.new("$.description.identifier[?(@['type'] == 'doi')].value").first(@cocina_json)
return unless identifier
# identification.doi or identifier.uri including doi.org or identifier.value with type "doi" (case-insensitive), made into URI if identifier only
doi_id = JsonPath.new('$.identification.doi').first(@cocina_json) ||
JsonPath.new("$.description.identifier[?(@['type'] == 'doi')].value").first(@cocina_json) ||
JsonPath.new("$.description.identifier[?(@['uri'] =~ /doi/)].uri").first(@cocina_json)
return unless doi_id

return [identifier] if identifier.start_with?('https://doi.org')
return [doi_id] if doi_id.start_with?('https://doi.org')

[URI.join('https://doi.org', identifier).to_s]
[URI.join('https://doi.org', doi_id).to_s]
end

def access
def access?
# true if access.download = "world"
return true if JsonPath.new("$.access[?(@['download'] == 'world')]").first(@cocina_json)

false
end

def video_access?
video = JsonPath.new("$.structural.contains[*][?(@['type'] == 'https://cocina.sul.stanford.edu/models/resources/video')]").on(@cocina_json)
# need to find the file that is the one for the video (based on mime-type). Then get the access and download rights for that.
file_access = JsonPath.new('$[*].structural.contains[*][?(@.hasMimeType =~ /video/)].access.download').first(video)

return true if file_access == 'world'

false
end

def license
JsonPath.new('$.access.license').first(@cocina_json)
end
Expand Down Expand Up @@ -130,8 +158,8 @@ def family_name(contributor)

def orcid(contributor)
# contributor.identifier.uri or contributor.identifier.value with type "orcid" (case-insensitive), made into URI if identifier only
id_uri = JsonPath.new('$.identifier.uri').first(contributor)
return id_uri if id_uri.present?
identifier = JsonPath.new('$.identifier.uri').first(contributor)
return identifier if identifier.present?

orcid = JsonPath.new("$.identifier.[?(@['type'] == 'ORCID' || @['type'] == 'orcid')].value").first(contributor)
return if orcid.blank?
Expand All @@ -140,5 +168,78 @@ def orcid(contributor)

URI.join('https://orcid.org/', orcid).to_s
end

def embed_url
iframe_url_template.expand(url: embeddable_url).to_s
end

def iframe_url_template
Addressable::Template.new(Settings.embed.iframe.url_template)
end

def embeddable_url
format(Settings.embed.url, druid: bare_druid)
end

def bare_druid
druid.delete_prefix('druid:')
end

def druid
JsonPath.new('$.externalIdentifier').first(@cocina_json)
end

def thumbnail
# required for Videos
# structural.contains.filename with hasMimeType = "image/jp2" where structural.contains has type https://cocina.sul.stanford.edu/models/resources/video",
video = JsonPath.new("$.structural.contains[*][?(@['type'] == 'https://cocina.sul.stanford.edu/models/resources/video')]").on(@cocina_json)
filename = JsonPath.new("$[*].structural.contains[*][?(@['hasMimeType'] == 'image/jp2')].filename").first(video)
return if filename.blank?

URI.join(Settings.stacks.url, "file/#{druid}/#{filename}").to_s
end

def upload_date
# required for Videos
# event.date.value or event.date.structuredValue.value with event.date.type "publication" and event.date.status "primary"
# first event.date.value or event.date.structuredValue.value with event.date.type "publication"
events = JsonPath.new('$.description.event[*]').on(@cocina_json)
JsonPath.new("$[*].date[*][?(@['type'] == 'publication' && @['status'] == 'primary')].value").first(events) ||
JsonPath.new("$[*].date[*][?(@['type'] == 'publication' && @['status'] == 'primary')].structuredValue[*].value").first(events) ||
JsonPath.new("$[*].date[*][?(@['type'] == 'publication')].value").first(events) ||
JsonPath.new("$[*].date[*][?(@['type'] == 'publication')].structuredValue[*].value").first(events) ||
no_date_type(events) || no_event_type(events)
end

def no_date_type(events)
# first event.date.value or event.date.structuredValue.value with event.type "publication" and event.date.type null
return unless events.any?

dates = JsonPath.new("$.[?(@['type']) == 'publication')].date[*].[?(@['value'])]").on(events)
structured_dates = JsonPath.new("$.[?(@['type']) == 'publication')].date[*].structuredValue[*]").on(events)
dates.concat(structured_dates)
return unless dates.any?

date_value(dates)
end

def no_event_type(events)
# first event.date.value or event.date.structuredValue with event.type null and event.date.type null
return unless events.any?

events.select! { |event| event.key?('type') == false }
dates = JsonPath.new('$[*].date[*]').on(events)
return unless dates.any?

date_value(dates)
end

def date_value(dates)
dates.select! { |date| date.key?('type') == false }
return unless dates.any?

dates.first.fetch('value')
end
end
# rubocop:enable Metrics/ClassLength
end
Loading

0 comments on commit 97e4daf

Please sign in to comment.