Skip to content

Commit

Permalink
schema.org markup for video objects
Browse files Browse the repository at this point in the history
  • Loading branch information
lwrubel committed Dec 5, 2023
1 parent 012dfa7 commit b6b8ef2
Show file tree
Hide file tree
Showing 6 changed files with 766 additions and 282 deletions.
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ end
group :development do
# Access an IRB console on exception pages or by using <%= console %> anywhere in the code.
gem 'web-console', '>= 3.3.0'
gem 'byebug'
end

group :development, :test do
Expand Down
2 changes: 2 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ GEM
bundler-audit (0.9.1)
bundler (>= 1.2.0, < 3)
thor (~> 1.0)
byebug (11.1.3)
cancancan (3.5.0)
capistrano (3.18.0)
airbrussh (>= 1.0.0)
Expand Down Expand Up @@ -442,6 +443,7 @@ PLATFORMS
DEPENDENCIES
addressable
bootsnap (>= 1.1.0)
byebug
cancancan
capistrano (~> 3.0)
capistrano-bundler
Expand Down
129 changes: 118 additions & 11 deletions lib/metadata/schema_dot_org.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
module Metadata
# rubocop:disable Metrics/ClassLength
class SchemaDotOrg
def self.call(cocina_json)
new(cocina_json).call
Expand All @@ -17,23 +18,21 @@ def call
"@context": 'http://schema.org',
"@type": schema_type,
"name": title_name,
"identifier": identifier,
"description": description,
"isAccessibleForFree": access,
"license": license,
"url": url,
"creator": creators
}.compact
"description": description
}.merge(format_specific_fields)
.compact
end

def schema_type?
dataset?
dataset? || video?
end

private

def schema_type
'Dataset' if dataset?
return 'Dataset' if dataset?

'Video' if video?
end

def dataset?
Expand All @@ -44,15 +43,38 @@ def dataset?
false
end

def video?
# Only return video metadata if world-downloadable.
video = JsonPath.new("$.description.form[?(@['value'] == 'moving image' && @['type'] == 'resource type')]").on(@cocina_json)
return true if video.any? && access?

false
end

def title_name
# title.value or concatenated title.structuredValue 1) for title with status "primary" if present 2) for first title
# required for Datasets
# required for Datasets and Videos
titles = JsonPath.new("$.description.title[?(@['status' == 'primary'])].structuredValue[*].value").on(@cocina_json)
return titles.join('\n') unless titles.empty?

JsonPath.new('$.description.title[0].value').first(@cocina_json)
end

def format_specific_fields
if dataset?
return { "identifier": identifier,
"isAccessibleForFree": access?,
"license": license,
"url": url,
"creator": creators }
elsif video?
return { "thumbnailUrl": thumbnail,
"uploadDate": upload_date,
"embedUrl": embed_url }
end
{}
end

def description
# description.note where type=summary or type=abstract, concatenating with \n if multiple
# required for Datasets
Expand All @@ -75,7 +97,7 @@ def identifier
[URI.join('https://doi.org', identifier).to_s]
end

def access
def access?
# true if access.download = "world"
return true if JsonPath.new("$.access[?(@['download'] == 'world')]").first(@cocina_json)

Expand Down Expand Up @@ -140,5 +162,90 @@ def orcid(contributor)

URI.join('https://orcid.org/', orcid).to_s
end

def embed_url
response = Faraday.get(iframe_url)
return unless response.success?

iframe_html = Nokogiri::HTML(get_html(response.body))
iframe_html.css('iframe')[0]['src']
end

def get_html(response_body)
JsonPath.new('$.html').first(response_body)
end

def iframe_url
oembed_url_template.expand(format: 'json', url: embeddable_url).to_s
end

def oembed_url_template
Addressable::Template.new(Settings.embed.url_template)
end

def embeddable_url
format(Settings.embed.url, druid: bare_druid)
end

def bare_druid
druid.delete_prefix('druid:')
end

def druid
JsonPath.new('$.externalIdentifier').first(@cocina_json)
end

def thumbnail
# required for Videos
# structural.contains.filename with hasMimeType = "image/jp2" where structural.contains has type https://cocina.sul.stanford.edu/models/resources/video",
video = JsonPath.new("$.structural.contains[*][?(@['type'] == 'https://cocina.sul.stanford.edu/models/resources/video')]").on(@cocina_json)
filename = JsonPath.new("$[*].structural.contains[*][?(@['hasMimeType'] == 'image/jp2')].filename").first(video)
return if filename.blank?

URI.join(Settings.stacks.url, "file/#{druid}/#{filename}").to_s
end

def upload_date
# required for Videos
# event.date.value or event.date.structuredValue.value with event.date.type "publication" and event.date.status "primary"
# first event.date.value or event.date.structuredValue.value with event.date.type "publication"
events = JsonPath.new('$.description.event[*]').on(@cocina_json)
JsonPath.new("$[*].date[*][?(@['type'] == 'publication' && @['status'] == 'primary')].value").first(events) ||
JsonPath.new("$[*].date[*][?(@['type'] == 'publication' && @['status'] == 'primary')].structuredValue[*].value").first(events) ||
JsonPath.new("$[*].date[*][?(@['type'] == 'publication')].value").first(events) ||
JsonPath.new("$[*].date[*][?(@['type'] == 'publication')].structuredValue[*].value").first(events) ||
no_date_type(events) || no_event_type(events)
end

def no_date_type(events)
# first event.date.value or event.date.structuredValue.value with event.type "publication" and event.date.type null
return unless events.any?

dates = JsonPath.new("$.[?(@['type']) == 'publication')].date[*].[?(@['value'])]").on(events)
structured_dates = JsonPath.new("$.[?(@['type']) == 'publication')].date[*].structuredValue[*]").on(events)
dates.concat(structured_dates)
return unless dates.any?

date_value(dates)
end

def no_event_type(events)
# first event.date.value or event.date.structuredValue with event.type null and event.date.type null
return unless events.any?

events.select! { |event| event.key?('type') == false }
dates = JsonPath.new('$[*].date[*]').on(events)
return unless dates.any?

date_value(dates)
end

def date_value(dates)
dates.select! { |date| date.key?('type') == false }
return unless dates.any?

dates.first.fetch('value')
end
end
# rubocop:enable Metrics/ClassLength
end
Loading

0 comments on commit b6b8ef2

Please sign in to comment.