Skip to content

Commit

Permalink
normalize EAD ID values on index (ACFA-548)
Browse files Browse the repository at this point in the history
- prefix IDs with leading numeric with 'cul-'
- replace leading 'ldpd_' with 'cul-'
- normalize non-numeric, non-alpha such as ASpace concatenator '.' to '-'
- correct CatalogController#resolve for arclight indexing strategies and spec, see also ACFA-575
  • Loading branch information
barmintor committed Dec 23, 2024
1 parent 150d734 commit cb1c7f9
Show file tree
Hide file tree
Showing 19 changed files with 65 additions and 29 deletions.
10 changes: 7 additions & 3 deletions app/controllers/application_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ class ApplicationController < ActionController::Base

attr_accessor :authenticity_token

PREFIX_ID_CUL = 'cul-'
PREFIX_ID_LDPD = 'ldpd_'

private

# @as_repo_id => repo ID in ArchiveSpace
Expand All @@ -15,9 +18,10 @@ def validate_repository_code_and_set_repo_id
rescue ActiveRecord::RecordNotFound => e
Rails.logger.warn(e.message)
unless params[:id].blank?
bib_id = params[:id].delete_prefix('ldpd_')
Rails.logger.warn("redirect to CLIO with Bib ID #{bib_id}")
redirect_to CONFIG[:clio_redirect_url] + bib_id
clio_id = params[:id].delete_prefix(PREFIX_ID_LDPD)
clio_id.sub!(PREFIX_ID_CUL, '')
Rails.logger.warn("redirect to CLIO with Bib ID #{clio_id}")
redirect_to (CONFIG[:clio_redirect_url] + clio_id), allow_other_host: true
else
Rails.logger.warn("no Bib ID in url")
redirect_to '/'
Expand Down
8 changes: 5 additions & 3 deletions app/controllers/catalog_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -376,18 +376,20 @@ class CatalogController < ApplicationController

def resolve
if params[:id].present?
solr_id = (params[:id] =~ /^\d+$/) ? "ldpd_#{params[:id]}" : params[:id]
solr_id = (params[:id] =~ /^\d+$/) ? "#{PREFIX_ID_CUL}#{params[:id]}" : params[:id].dup
solr_id.sub!(PREFIX_ID_LDPD, PREFIX_ID_CUL)
clio_id = solr_id.delete_prefix(PREFIX_ID_CUL) if solr_id =~ /cul\-/
begin
@document = search_service.fetch(solr_id)
repo_id = @document&.fetch(:repository_id_ssi, nil)
if repo_id
redirect_to repository_finding_aid_path(repository_id: repo_id, id: solr_id)
else
redirect_to CONFIG[:clio_redirect_url] + params[:id].delete_prefix('ldpd_')
redirect_to (CONFIG[:clio_redirect_url] + clio_id), allow_other_host: true
end
rescue Blacklight::Exceptions::RecordNotFound
Rails.logger.warn("Record not found: #{solr_id}")
redirect_url = (CONFIG[:clio_redirect_url] + solr_id.delete_prefix('ldpd_')) if (solr_id =~ /^ldpd_\d+$/)
redirect_url = (CONFIG[:clio_redirect_url] + clio_id) if clio_id
if redirect_url
redirect_to(redirect_url, allow_other_host: true)
else
Expand Down
2 changes: 1 addition & 1 deletion app/values/aeon_local_request.rb
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def reference_number
# but we don't currently send Barnard items to Aeon so we don't need to worry about
# handling that case. We can always assume that all relevant records have an
# extractable bibid.
match_data = @solr_document['id'].match(/ldpd_(.+)_aspace.*/);
match_data = @solr_document['id'].match(/cul-(.+)_aspace.*/);
match_data.nil? ? nil : match_data[1]
end

Expand Down
8 changes: 7 additions & 1 deletion lib/ead/traject/ead2_config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,14 @@ def eadid_from_url_or_text(field_name)
ead_url = record.xpath('/ead/eadheader/eadid/@url').first
ead_id = /ldpd_(\d+)\/?/.match(ead_url.to_s)&.[](1)
end
ead_id.sub!(/^ldpd_/,'')
ead_id.gsub!(/[^A-Za-z0-9]/,'-')
if ead_id
accumulator.concat ["ldpd_#{ead_id}"]
if ead_id =~ /^\d/
accumulator.concat ["cul-#{ead_id}"]
else
accumulator.concat [ead_id]
end
else
logger.warn "no id found; skipping #{settings['command_line.filename']}"
context.skip!
Expand Down
6 changes: 3 additions & 3 deletions lib/tasks/acfa.rake
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ namespace :acfa do
solr_url = ENV.fetch('SOLR_URL', Blacklight.default_index.connection.base_uri)
ead_dir = CONFIG[:ead_cache_dir]
puts "Seeding index for #{rails_env}"
bib_pattern = /ldpd_(\d+).xml$/
bib = ENV['BIB'] ? "ldpd_#{ENV['BIB']}" : '*'
bib_pattern = /cul-(\d+).xml$/
bib = ENV['BIB'] ? "cul-#{ENV['BIB']}" : '*'
filename_pattern = ENV['PATTERN']
filename_pattern ||= (ENV['CLIO_STUBS'].to_s =~ /true/i) ? "clio_ead_ldpd_#{bib}.xml" : "as_ead_#{bib}.xml"
filename_pattern ||= (ENV['CLIO_STUBS'].to_s =~ /true/i) ? "clio_ead_cul-#{bib}.xml" : "as_ead_#{bib}.xml"
indexed = 0
glob_pattern = File.join(ead_dir, filename_pattern)
puts "Seeding index with as_ead data from #{glob_pattern}..."
Expand Down
4 changes: 2 additions & 2 deletions spec/controllers/catalog_controller_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
describe "GET #resolve" do
let(:id) { '123456789' }
let(:bib_id) { '123456789' }
let(:solr_id) { 'ldpd_123456789' }
let(:solr_id) { 'cul-123456789' }
let(:repository_id) { 'nnc' }
let(:search_service) { instance_double(Blacklight::SearchService) }
let(:solr_doc) { {id: solr_id, repository_id_ssi: repository_id} }
Expand Down Expand Up @@ -38,7 +38,7 @@
end
end
context "id has ldpd prefix" do
let(:id) { 'ldpd_123456789' }
let(:id) { 'cul-123456789' }
it "redirects to finding aid" do
expect(controller).to receive(:redirect_to).with(finding_aid_url).and_call_original
get :resolve, params: { id: id }
Expand Down
10 changes: 9 additions & 1 deletion spec/ead/traject/ead2_config_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
end
let(:index_document) { subject.map_record(record).with_indifferent_access }
describe 'eadid' do
let(:expected_value) { 'ldpd_1234567' }
let(:expected_value) { 'cul-1234567' }
context 'is given in /ead/archdesc/unitid[1]/text()' do
let(:fixture_path) { File.join(file_fixture_path, 'ead/test_eadid/from_unitid.xml') }
it { expect(index_document).not_to be_nil }
Expand All @@ -44,6 +44,14 @@
it { expect(index_document[:id]).to eql [expected_value] }
it { expect(index_document[:ead_ssi]).to eql [expected_value] }
end

context 'is a dot-delimited ID' do
let(:expected_value) { 'BC20-09' }
let(:fixture_path) { File.join(file_fixture_path, 'ead/test_eadid/from_nynybaw_ead.xml') }
it { expect(index_document).not_to be_nil }
it { expect(index_document[:id]).to eql [expected_value] }
it { expect(index_document[:ead_ssi]).to eql [expected_value] }
end
end
describe 'date_range_* indexing' do
context 'has bulk range' do
Expand Down
16 changes: 16 additions & 0 deletions spec/fixtures/ead/test_eadid/from_nynybaw_ead.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?xml version="1.0" encoding="utf-8"?>
<ead xmlns="urn:isbn:1-931666-22-9" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:isbn:1-931666-22-9 http://www.loc.gov/ead/ead.xsd"><eadheader countryencoding="iso3166-1" dateencoding="iso8601" findaidstatus="in_progress" langencoding="iso639-2b" repositoryencoding="iso15511"><eadid countrycode="US" mainagencycode="US-NyNyBAW">BC20.09</eadid><filedesc><titlestmt><titleproper>Guide to Alice Duer Miller Papers
<num>BC20.09</num></titleproper><author>Shannon O'Neill</author></titlestmt><publicationstmt><publisher>Barnard Archives and Special Collections</publisher><p id="logostmt"><extref xlink:actuate="onLoad" xlink:href="https://archives.barnard.edu/sites/default/files/archives-logo-black2_smaller.png" xlink:show="embed" xlink:type="simple"/></p><p><date>© 2015</date></p><address><addressline>3009 Broadway</addressline><addressline>New York, NY 10027</addressline><addressline>[email protected]</addressline><addressline>URL: <extptr xlink:href="http://archives.barnard.edu/" xlink:show="new" xlink:title="http://archives.barnard.edu/" xlink:type="simple"/></addressline></address></publicationstmt></filedesc><profiledesc><creation>This finding aid was produced using ArchivesSpace on <date>2024-03-01 18:18:26 -0500</date>.</creation><langusage>English</langusage><descrules>Describing Archives: A Content Standard</descrules></profiledesc></eadheader><archdesc level="collection">
<did>
<repository>
<corpname>Barnard Archives and Special Collections</corpname>
</repository>
<unittitle>Alice Duer Miller Papers</unittitle>
<origination label="Creator">
<persname rules="dacs" source="local">Miller, Alice Duer, 1874-1942</persname>
</origination>
<unitid>BC20.09</unitid>
<unitid type="aspace_uri">/repositories/2/resources/355</unitid>
</did>
</archdesc>
</ead>
2 changes: 1 addition & 1 deletion spec/models/solr_document_spec.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
require 'rails_helper'

describe SolrDocument, type: :model do
let(:id) { "ldpd_#{bibid}_aspace_abcdefabcdefabcdefabcdefabcdefab" }
let(:id) { "cul-#{bibid}_aspace_abcdefabcdefabcdefabcdefabcdefab" }
let(:bibid) { '12345678' }
let(:title_ssm) { ['Great Title'] }
let(:creator_ssim) { ['Great Author'] }
Expand Down
2 changes: 1 addition & 1 deletion spec/requests/api/v1/index/delete_ead_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
end

describe "with authentication" do
let(:bibids) { ['ldpd_7746709', 'ldpd_8972723'] }
let(:bibids) { ['cul-7746709', 'cul-8972723'] }
let(:delete_ead_job_double) { double(DeleteEadJob) }
before do
allow(DeleteEadJob).to receive(:new).and_return(delete_ead_job_double)
Expand Down
2 changes: 1 addition & 1 deletion spec/requests/api/v1/index/index_ead_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
end

describe "with authentication" do
let(:bibids) { ['ldpd_7746709', 'ldpd_8972723'] }
let(:bibids) { ['cul-7746709', 'cul-8972723'] }
let(:index_ead_job_double) { double(IndexEadJob) }
before do
allow(IndexEadJob).to receive(:new).and_return(index_ead_job_double)
Expand Down
2 changes: 1 addition & 1 deletion spec/values/aeon_local_request_spec.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
require 'rails_helper'

RSpec.describe AeonLocalRequest do
let(:id) { "ldpd_#{bibid}_aspace_abcdefabcdefabcdefabcdefabcdefab" }
let(:id) { "cul-#{bibid}_aspace_abcdefabcdefabcdefabcdefabcdefab" }
let(:bibid) { '12345678' }
let(:collection_ssim) { ['Great Collection Name'] }
let(:title_ssm) { ['Great Title'] }
Expand Down
2 changes: 1 addition & 1 deletion spec/views/catalog/_document.atom.builder_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
require 'rails_helper'

RSpec.describe "catalog/_document.atom.builder", type: :view do
let(:finding_aid_id) { 'ldpd_ABCDEFG' }
let(:finding_aid_id) { 'cul-ABCDEFG' }
let(:repository_id) { 'nnc' }
let(:aspace_base_repository_id) { 'nynybaw' }
let(:arclight_params) { {_root_: finding_aid_id, repository_id_ssi: repository_id, level_ssim: ['File'], component_level_isim: [3]} }
Expand Down
2 changes: 1 addition & 1 deletion spec/views/catalog/_document.rss.builder_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
require 'rails_helper'

RSpec.describe "catalog/_document.rss.builder", type: :view do
let(:finding_aid_id) { 'ldpd_ABCDEFG' }
let(:finding_aid_id) { 'cul-ABCDEFG' }
let(:repository_id) { 'nnc' }
let(:aspace_base_repository_id) { 'nynybaw' }
let(:arclight_params) { {_root_: finding_aid_id, repository_id_ssi: repository_id, level_ssim: ['File'], component_level_isim: [3]} }
Expand Down
6 changes: 3 additions & 3 deletions spec/views/catalog/index.json.builder_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

RSpec.describe "catalog/index.json", type: :view do
let(:response) { instance_double(Blacklight::Solr::Response, documents: docs, prev_page: nil, next_page: 2, total_pages: 3) }
let(:finding_aid_id) { 'ldpd_ABCDEFG' }
let(:finding_aid_id) { 'cul-ABCDEFG' }
let(:repository_id) { 'nnc' }
let(:aspace_base_repository_id) { 'nynybaw' }
let(:arclight_params) { {_root_: finding_aid_id, repository_id_ssi: repository_id, level_ssim: ['File'], component_level_isim: [3]} }
Expand Down Expand Up @@ -74,12 +74,12 @@
it "serializes a local collection" do
expect(response_data).to include(
{
id: 'ldpd_ABCDEFG',
id: finding_aid_id,
type: 'Collection',
attributes: {
title: 'CUL Collection'
},
links: { self: '/archives/ldpd_ABCDEFG' }
links: { self: "/archives/#{finding_aid_id}" }
}
)
end
Expand Down
2 changes: 1 addition & 1 deletion test/fixtures/files/fa_lists/nnc-a_fa_list.html
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
<ul>
<li><a href="/ead/nnc-a/ldpd_8972723">Rose Associates Inc. records</a></li>
<li><a href="/ead/nnc-a/cul-8972723">Rose Associates Inc. records</a></li>
</ul>
2 changes: 1 addition & 1 deletion test/fixtures/files/fa_lists/nnc-ea_fa_list.html
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
<ul>
<li><a href="/ead/nnc-ea/ldpd_14362723">Guide to the Ta-Chun Hsu papers</a></li>
<li><a href="/ead/nnc-ea/cul-14362723">Guide to the Ta-Chun Hsu papers</a></li>
</ul>
6 changes: 3 additions & 3 deletions test/fixtures/files/fa_lists/nnc-rb_fa_list.html
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<ul>
<li><a href="/ead/nnc-rb/ldpd_13202800">Pamela Moore papers</a></li>
<li><a href="/ead/nnc-rb/ldpd_14439340">Gail Mary Killian and Stephen Desroches sound recordings</a></li>
<li><a href="/ead/nnc-rb/ldpd_6948211">TRIGA reactor records</a></li>
<li><a href="/ead/nnc-rb/cul-13202800">Pamela Moore papers</a></li>
<li><a href="/ead/nnc-rb/cul-14439340">Gail Mary Killian and Stephen Desroches sound recordings</a></li>
<li><a href="/ead/nnc-rb/cul-6948211">TRIGA reactor records</a></li>
</ul>
2 changes: 1 addition & 1 deletion test/fixtures/files/fa_lists/nnc-ut_fa_list.html
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
<ul>
<li><a href="/ead/nnc-ut/ldpd_16607351">Henry Pelham Burn letters on the Moral Re-Armament movement</a></li>
<li><a href="/ead/nnc-ut/cul-16607351">Henry Pelham Burn letters on the Moral Re-Armament movement</a></li>
</ul>

0 comments on commit cb1c7f9

Please sign in to comment.