Skip to content

Commit

Permalink
Merge pull request #15 from MITLibraries/engx244-lookups
Browse files Browse the repository at this point in the history
Adds data lookup for detected standard identifiers
  • Loading branch information
JPrevost authored Nov 20, 2023
2 parents c766a83 + 9364206 commit 6a925ab
Show file tree
Hide file tree
Showing 26 changed files with 1,095 additions and 1 deletion.
2 changes: 2 additions & 0 deletions .env.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
LINKRESOLVER_BASEURL=https://mit.primo.exlibrisgroup.com/discovery/openurl?institution=01MIT_INST&rfr_id=info:sid/mit.tacos.api&vid=01MIT_INST:MIT
UNPAYWALL_EMAIL=[email protected]
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
# Ignore all environment files (except templates).
/.env*
!/.env*.erb
# Include test env file
!/.env.test

# Ignore all logfiles and tempfiles.
/log/*
Expand Down
8 changes: 8 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ gem 'bootsnap', require: false
# Ruby GraphQL implememntation [https://github.com/rmosolgo/graphql-ruby]
gem 'graphql'

# HTTP is an easy-to-use client library for making requests from Ruby [https://github.com/httprb/http]
gem 'http'

# Use JavaScript with ESM import maps [https://github.com/rails/importmap-rails]
gem 'importmap-rails'

Expand Down Expand Up @@ -54,6 +57,9 @@ gem 'tzinfo-data', platforms: %i[windows jruby]
group :development, :test do
# See https://guides.rubyonrails.org/debugging_rails_applications.html#debugging-with-the-debug-gem
gem 'debug', platforms: %i[mri windows]

# Allow selective loading of configuration in different contexts (dev/test)
gem 'dotenv-rails'
end

group :development do
Expand Down Expand Up @@ -81,4 +87,6 @@ group :test do
gem 'selenium-webdriver'
gem 'simplecov'
gem 'simplecov-lcov'
gem 'vcr'
gem 'webmock'
end
36 changes: 36 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -98,22 +98,43 @@ GEM
xpath (~> 3.2)
concurrent-ruby (1.2.2)
connection_pool (2.4.1)
crack (0.4.5)
rexml
crass (1.0.6)
date (3.3.4)
debug (1.8.0)
irb (>= 1.5.0)
reline (>= 0.3.1)
docile (1.4.0)
domain_name (0.5.20190701)
unf (>= 0.0.5, < 1.0.0)
dotenv (2.8.1)
dotenv-rails (2.8.1)
dotenv (= 2.8.1)
railties (>= 3.2)
drb (2.2.0)
ruby2_keywords
erubi (1.12.0)
ffi (1.16.3)
ffi-compiler (1.0.1)
ffi (>= 1.0.0)
rake
globalid (1.2.1)
activesupport (>= 6.1)
graphiql-rails (1.9.0)
railties
sprockets-rails
graphql (2.1.6)
racc (~> 1.4)
hashdiff (1.0.1)
http (5.1.1)
addressable (~> 2.8)
http-cookie (~> 1.0)
http-form_data (~> 2.2)
llhttp-ffi (~> 0.4.0)
http-cookie (1.0.5)
domain_name (~> 0.5)
http-form_data (2.3.0)
i18n (1.14.1)
concurrent-ruby (~> 1.0)
importmap-rails (1.2.3)
Expand All @@ -129,6 +150,9 @@ GEM
activesupport (>= 5.0.0)
json (2.6.3)
language_server-protocol (3.17.0.3)
llhttp-ffi (0.4.0)
ffi-compiler (~> 1.0)
rake (~> 13.0)
loofah (2.22.0)
crass (~> 1.0.2)
nokogiri (>= 1.12.0)
Expand Down Expand Up @@ -268,12 +292,20 @@ GEM
railties (>= 6.0.0)
tzinfo (2.0.6)
concurrent-ruby (~> 1.0)
unf (0.1.4)
unf_ext
unf_ext (0.0.8.2)
unicode-display_width (2.5.0)
vcr (6.2.0)
web-console (4.2.1)
actionview (>= 6.0.0)
activemodel (>= 6.0.0)
bindex (>= 0.4.0)
railties (>= 6.0.0)
webmock (3.19.1)
addressable (>= 2.8.0)
crack (>= 0.3.2)
hashdiff (>= 0.4.0, < 2.0.0)
webrick (1.8.1)
websocket (1.2.10)
websocket-driver (0.7.6)
Expand All @@ -293,8 +325,10 @@ DEPENDENCIES
bootsnap
capybara
debug
dotenv-rails
graphiql-rails
graphql
http
importmap-rails
jbuilder
puma (>= 5.0)
Expand All @@ -310,7 +344,9 @@ DEPENDENCIES
stimulus-rails
turbo-rails
tzinfo-data
vcr
web-console
webmock

RUBY VERSION
ruby 3.2.2p53
Expand Down
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,7 @@
# tacos
# tacos

## Required Environment Variables

`LINKRESOLVER_BASEURL`: base url for our link resolver. `https://mit.primo.exlibrisgroup.com/discovery/openurl?institution=01MIT_INST&rfr_id=info:sid/mit.tacos.api&vid=01MIT_INST:MIT` is probably the best value unless you are doing something interesting.

`UNPAYWALL_EMAIL`: email address to include in API call as required in their [documentation](https://unpaywall.org/products/api). Your personal email is appropriate for development. Deployed and for tests, use the timdex moira list email.
25 changes: 25 additions & 0 deletions app/graphql/types/details_type.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# frozen_string_literal: true

module Types
class DetailsType < Types::BaseObject
field :title, String
field :authors, [String]
field :date, String
field :publisher, String
field :oa, Boolean
field :oa_status, String
field :best_oa_location, String
field :issns, [String]
field :journal_name, String
field :doi, String
field :link_resolver_url, String

def issns
@object[:journal_issns]&.split(',')
end

def authors
@object[:authors]&.split(',')
end
end
end
16 changes: 16 additions & 0 deletions app/graphql/types/standard_identifiers_type.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,21 @@ module Types
class StandardIdentifiersType < Types::BaseObject
field :kind, String, null: false
field :value, String, null: false
field :details, DetailsType

# details does external lookups and should only be run if the fields
# have been explicitly requested
def details
case @object[:kind]
when :doi
LookupDoi.new.info(@object[:value])
when :isbn
LookupIsbn.new.info(@object[:value])
when :issn
LookupIssn.new.info(@object[:value])
when :pmid
LookupPmid.new.info(@object[:value].split.last)
end
end
end
end
50 changes: 50 additions & 0 deletions app/models/lookup_doi.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# frozen_string_literal: true

class LookupDoi
def info(doi)
external_data = fetch(doi)
return if external_data == 'Error'

metadata = extract_metadata(external_data)
metadata[:doi] = doi
metadata[:link_resolver_url] = link_resolver_url(metadata)
metadata
end

private

# NOTE: authors are available as objects within `'z_authors` but is somewhat
# complicated so wasn't implemented during this initial work
def extract_metadata(external_data)
{
genre: external_data['genre'],
title: external_data['title'],
date: external_data['year'],
publisher: external_data['publisher'],
oa: external_data['is_oa'],
oa_status: external_data['oa_status'],
best_oa_location: external_data['best_oa_location'],
journal_issns: external_data['journal_issns'],
journal_name: external_data['journal_name']
}
end

def url(doi)
"https://api.unpaywall.org/v2/#{doi}?email=#{ENV.fetch('UNPAYWALL_EMAIL')}"
end

def fetch(doi)
resp = HTTP.headers(accept: 'application/json').get(url(doi))
if resp.status == 200
JSON.parse(resp.to_s)
else
Rails.logger.debug("Fact lookup error. DOI #{doi} detected but unpaywall returned no data or otherwise errored")
Rails.logger.debug("URL: #{url(doi)}")
'Error'
end
end

def link_resolver_url(metadata)
"#{ENV.fetch('LINKRESOLVER_BASEURL')}&rft.atitle=#{metadata[:title]}&rft.date=#{metadata[:year]}&rft.genre=#{metadata[:genre]}&rft.jtitle=#{metadata[:journal_name]}&rft_id=info:doi/#{metadata[:doi]}"
end
end
53 changes: 53 additions & 0 deletions app/models/lookup_isbn.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# frozen_string_literal: true

class LookupIsbn
def info(isbn)
json = fetch_isbn(isbn)
return if json == 'Error'

{
title: json['title'],
date: json['publish_date'],
publisher: json['publishers'].join(','),
authors: fetch_authors(json),
link_resolver_url: link_resolver_url(isbn)
}
end

def base_url
'https://openlibrary.org'
end

def fetch_isbn(isbn)
url = [base_url, "/isbn/#{isbn}.json"].join
parse_response(url)
end

def fetch_authors(isbn_json)
return unless isbn_json['authors']

authors = isbn_json['authors'].map { |a| a['key'] }
author_names = authors.map do |author|
url = [base_url, author, '.json'].join
json = parse_response(url)
json['name']
end
author_names.join(' ; ')
end

def parse_response(url)
resp = HTTP.headers(accept: 'application/json', 'Content-Type': 'application/json').follow.get(url)

if resp.status == 200
JSON.parse(resp.to_s)
else
Rails.logger.debug('Fact lookup error: openlibrary returned no data')
Rails.logger.debug("URL: #{url}")
'Error'
end
end

def link_resolver_url(isbn)
"#{ENV.fetch('LINKRESOLVER_BASEURL')}&rft.isbn=#{isbn}"
end
end
43 changes: 43 additions & 0 deletions app/models/lookup_issn.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# frozen_string_literal: true

# LookupIssn assumes the ISSN being supplied has been validated prior to this Class being used.
# In this application, we only LookupIssns that have been detected in StandardIdentifiers which performs
# that validation for us. If extracting this logic to be used elsewhere, it is highly recommended to validate
# ISSNs before doing an external lookup.
class LookupIssn
def info(issn)
json = fetch(issn)
return if json == 'Error'

metadata = extract_metadata(json)
metadata[:link_resolver_url] = openurl(issn)
metadata
end

def extract_metadata(response)
{
journal_name: response['message']['title'],
publisher: response['message']['publisher'],
journal_issns: response['message']['ISSN'].join(',')
}
end

def url(issn)
"https://api.crossref.org/journals/#{issn}"
end

def fetch(issn)
resp = HTTP.headers(accept: 'application/json').get(url(issn))
if resp.status == 200
JSON.parse(resp.to_s)
else
Rails.logger.debug("ISSN Lookup error. ISSN #{issn} detected but crossref returned no data")
Rails.logger.debug("URL: #{url(issn)}")
'Error'
end
end

def openurl(issn)
"#{ENV.fetch('LINKRESOLVER_BASEURL')}&rft.issn=#{issn}"
end
end
49 changes: 49 additions & 0 deletions app/models/lookup_pmid.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# frozen_string_literal: true

class LookupPmid
def info(pmid)
xml = fetch(pmid)
return if xml == 'Error'

metadata = extract_metadata(xml)
metadata[:pmid] = pmid
metadata[:link_resolver_url] = link_resolver_url(metadata)

if metadata.reject { |_k, v| v.empty? }.present?
metadata
else
Rails.logger.debug("Fact lookup error. PMID #{pmid} detected but ncbi returned no data")
nil
end
end

def extract_metadata(xml)
{
title: xml.xpath('//ArticleTitle').text,
journal_name: xml.xpath('//Journal/Title').text,
journal_volume: xml.xpath('//Journal/JournalIssue/Volume').text,
date: xml.xpath('//Journal/JournalIssue/PubDate/Year').text,
doi: xml.xpath('//PubmedData/ArticleIdList/ArticleId[@IdType="doi"]').text
}
end

def url(pmid)
"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=#{pmid}&retmode=xml"
end

def fetch(pmid)
resp = HTTP.headers(accept: 'application/xml').get(url(pmid))

if resp.status == 200
Nokogiri::XML(resp.to_s)
else
Rails.logger.debug("Fact lookup error. PMID #{pmid} detected but ncbi an error status")
Rails.logger.debug("URL: #{url(pmid)}")
'Error'
end
end

def link_resolver_url(metadata)
"#{ENV.fetch('LINKRESOLVER_BASEURL')}&rft.atitle=#{metadata[:title]}&rft.date=#{metadata[:date]}&rft.jtitle=#{metadata[:journal_name]}&rft_id=info:doi/#{metadata[:doi]}"
end
end
Loading

0 comments on commit 6a925ab

Please sign in to comment.