From 44dbb6a002a4e4075a025e492f6a3fffe982c308 Mon Sep 17 00:00:00 2001 From: jambun Date: Tue, 27 Jun 2017 13:53:20 +1000 Subject: [PATCH 1/5] WIP: Adding support for generating handles on export Currently lacks: - Any sort of error handling - An actual handle server --- backend/model/resource_update_monitor.rb | 13 +++++----- exporter_app/config/config.rb | 8 ++++++ exporter_app/config/jobs.rb | 2 ++ exporter_app/tasks/export_ead_task.rb | 25 +++++++++++++++++++ .../tasks/lib/archivesspace_client.rb | 17 ++++++++++--- exporter_app/tasks/lib/handle_client.rb | 18 +++++++++++++ exporter_app/tasks/lib/sqlite_work_queue.rb | 1 + 7 files changed, 75 insertions(+), 9 deletions(-) create mode 100644 exporter_app/tasks/lib/handle_client.rb diff --git a/backend/model/resource_update_monitor.rb b/backend/model/resource_update_monitor.rb index 19f1d94..0c9e5be 100644 --- a/backend/model/resource_update_monitor.rb +++ b/backend/model/resource_update_monitor.rb @@ -3,18 +3,18 @@ class ResourceUpdateMonitor CHANGED_RECORD_QUERIES = { :updated_resources => - ('select DISTINCT r.id, r.title, r.identifier, r.ead_id, r.repo_id, r.publish, r.suppressed' + + ('select DISTINCT r.id, r.title, r.identifier, r.ead_id, r.ead_location, r.repo_id, r.publish, r.suppressed' + ' from resource r' + ' where system_mtime >= ?'), :updated_archival_objects => - ('select DISTINCT r.id, r.title, r.identifier, r.ead_id, r.repo_id, r.publish, r.suppressed ' + + ('select DISTINCT r.id, r.title, r.identifier, r.ead_id, r.ead_location, r.repo_id, r.publish, r.suppressed ' + ' from resource r' + ' inner join archival_object ao on ao.root_record_id = r.id' + ' where ao.system_mtime >= ?'), :updated_digital_object_via_resource => - ('select DISTINCT r.id, r.title, r.identifier, r.ead_id, r.repo_id, r.publish, r.suppressed' + + ('select DISTINCT r.id, r.title, r.identifier, r.ead_id, r.ead_location, r.repo_id, r.publish, r.suppressed' + ' from digital_object do' + ' inner join instance_do_link_rlshp rlshp on rlshp.digital_object_id = do.id' + ' inner join instance i on i.id = rlshp.instance_id' + @@ -22,7 +22,7 @@ class ResourceUpdateMonitor ' where do.system_mtime >= ?'), :updated_digital_object_via_ao => - ('select DISTINCT r.id, r.title, r.identifier, r.ead_id, r.repo_id, r.publish, r.suppressed from digital_object do' + + ('select DISTINCT r.id, r.title, r.identifier, r.ead_id, r.ead_location, r.repo_id, r.publish, r.suppressed from digital_object do' + ' inner join instance_do_link_rlshp rlshp on rlshp.digital_object_id = do.id' + ' inner join instance i on i.id = rlshp.instance_id' + ' inner join archival_object ao on ao.id = i.archival_object_id' + @@ -30,7 +30,7 @@ class ResourceUpdateMonitor ' where do.system_mtime >= ?'), :updated_digital_object_component_via_resource => - ('select DISTINCT r.id, r.title, r.identifier, r.ead_id, r.repo_id, r.publish, r.suppressed from digital_object_component doc' + + ('select DISTINCT r.id, r.title, r.identifier, r.ead_id, r.ead_location, r.repo_id, r.publish, r.suppressed from digital_object_component doc' + ' inner join digital_object do on doc.root_record_id = do.id' + ' inner join instance_do_link_rlshp rlshp on rlshp.digital_object_id = do.id' + ' inner join instance i on i.id = rlshp.instance_id' + @@ -38,7 +38,7 @@ class ResourceUpdateMonitor ' where doc.system_mtime >= ?'), :updated_digital_object_component_via_ao => - ('select DISTINCT r.id, r.title, r.identifier, r.ead_id, r.repo_id, r.publish, r.suppressed from digital_object_component doc' + + ('select DISTINCT r.id, r.title, r.identifier, r.ead_id, r.ead_location, r.repo_id, r.publish, r.suppressed from digital_object_component doc' + ' inner join digital_object do on doc.root_record_id = do.id' + ' inner join instance_do_link_rlshp rlshp on rlshp.digital_object_id = do.id' + ' inner join instance i on i.id = rlshp.instance_id' + @@ -133,6 +133,7 @@ def updates_since(timestamp) 'id' => res[:id], 'title' => res[:title], 'ead_id' => res[:ead_id], + 'ead_location' => res[:ead_location], 'identifier' => JSON.parse(res[:identifier]), 'repo_id' => res[:repo_id], 'uri' => JSONModel(:resource).uri_for(res[:id], :repo_id => res[:repo_id]), diff --git a/exporter_app/config/config.rb b/exporter_app/config/config.rb index ad6bffd..723d0ae 100644 --- a/exporter_app/config/config.rb +++ b/exporter_app/config/config.rb @@ -2,5 +2,13 @@ aspace_username: 'admin', aspace_password: 'admin', aspace_backend_url: 'http://localhost:4567/', + + handle_wsdl_url: 'http://link.its.yale.edu/ypls-ws/PersistentLinking?wsdl', + handle_user: '10079/FA', + handle_credential: '[YOUR CREDENTIAL]', + handle_prefix: '10079/fa', + handle_group: '10079/FA', + handle_base: 'http://findit.library.yale.edu/catalog', + log_level: 'debug' } diff --git a/exporter_app/config/jobs.rb b/exporter_app/config/jobs.rb index cabf5be..fcaf02e 100644 --- a/exporter_app/config/jobs.rb +++ b/exporter_app/config/jobs.rb @@ -23,6 +23,8 @@ :numbered_cs => false }, + :generate_handles => true, + :xslt_transforms => ['config/transform.xslt'], :validation_schema => ['config/ead.xsd'], :schematron_checks => ['config/schematron.sch'], diff --git a/exporter_app/tasks/export_ead_task.rb b/exporter_app/tasks/export_ead_task.rb index b53f5a4..0097dd9 100644 --- a/exporter_app/tasks/export_ead_task.rb +++ b/exporter_app/tasks/export_ead_task.rb @@ -7,6 +7,7 @@ require_relative 'lib/xslt_processor' require_relative 'lib/sqlite_work_queue' require_relative 'lib/archivesspace_client' +require_relative 'lib/handle_client' require_relative 'lib/validation_failed_exception' class ExportEADTask < TaskInterface @@ -31,6 +32,15 @@ def initialize(task_params, job_identifier, workspace_base) config = ExporterApp.config @as_client = ArchivesSpaceClient.new(config[:aspace_backend_url], config[:aspace_username], config[:aspace_password]) + if (@generate_handles = task_params.fetch(:generate_handles, false)) + @handle_client = HandleClient.new(config[:handle_wsdl_url], + config[:handle_user], + config[:handle_credential], + config[:handle_prefix], + config[:handle_group], + config[:handle_base]) + end + @commit_every_n_records = task_params.fetch(:commit_every_n_records, nil) @records_added = 0 @@ -66,6 +76,7 @@ def call(process) while (still_running = process.running?) && !max_records_hit? && (item = @work_queue.next) if item[:action] == 'add' begin + ensure_handle(item) if @generate_handles download_ead(item) create_manifest_json(item) rescue SkipRecordException @@ -152,6 +163,20 @@ def path_for_export_file(basename, extension = 'xml') File.join(output_directory, "#{basename}.#{extension}") end + def ensure_handle(item) + @log.info("Ensuring there is a handle for #{item[:uri]}") + @log.debug("ead_id: '#{item[:ead_id]}', ead_location: '#{item[:ead_location]}'") + + if !item[:ead_location] && item[:ead_id] + handle = @handle_client.create_handle(item[:ead_id]) + @log.debug("Created handle: #{handle}") + response = @as_client.update_record(item[:uri], 'ead_location' => handle) + @log.debug("Updated resource: #{response}") + else + @log.debug("No need to create handle") + end + end + def download_ead(item) @log.info("Downloading EAD for #{item[:uri]}") id = item.fetch(:resource_id) diff --git a/exporter_app/tasks/lib/archivesspace_client.rb b/exporter_app/tasks/lib/archivesspace_client.rb index 90871fc..615f903 100644 --- a/exporter_app/tasks/lib/archivesspace_client.rb +++ b/exporter_app/tasks/lib/archivesspace_client.rb @@ -21,17 +21,28 @@ def export(id, repo_id, opts = {}) get("/repositories/#{repo_id}/resource_descriptions/#{id}.xml", opts) end + def update_record(uri, hash) + json_post(uri, json_get(uri).merge(hash), true) + end + private def login json_post("/users/#{@username}/login", :password => @password, :expiring => false)['session'] end - def json_post(path, params) + def json_post(path, params, body = false) uri = URI.join(@aspace_backend_url, @aspace_backend_path, path.gsub(/^\//,"")) request = Net::HTTP::Post.new(uri) - request.form_data = params + request['X-ArchivesSpace-Session'] = @session if @session + + if body + request['Content-Type'] = 'text/json' + request.body = JSON.generate(params) + else + request.form_data = params + end http = Net::HTTP.new(uri.host, uri.port) @@ -75,7 +86,7 @@ def get(path, params) response.body end - def json_get(uri, params) + def json_get(uri, params = {}) JSON(get(uri, params)) end end diff --git a/exporter_app/tasks/lib/handle_client.rb b/exporter_app/tasks/lib/handle_client.rb new file mode 100644 index 0000000..5babca7 --- /dev/null +++ b/exporter_app/tasks/lib/handle_client.rb @@ -0,0 +1,18 @@ +require 'net/http' +require 'net/https' + +class HandleClient + + def initialize(wsdl_url, user, credential, prefix, group, handle_base = 'http://hdl.handle.net') + @wsdl_url = wsdl_url + @user = user + @credential = credential + @prefix = prefix + @group = group + @handle_base = handle_base + end + + def create_handle(id) + [@handle_base, @prefix, id].join('/') + end +end diff --git a/exporter_app/tasks/lib/sqlite_work_queue.rb b/exporter_app/tasks/lib/sqlite_work_queue.rb index f536188..6f56de6 100644 --- a/exporter_app/tasks/lib/sqlite_work_queue.rb +++ b/exporter_app/tasks/lib/sqlite_work_queue.rb @@ -7,6 +7,7 @@ class SQLiteWorkQueue {:name => 'title', :sqltype => 'text', :jdbctype => 'string'}, {:name => 'uri', :sqltype => 'text', :jdbctype => 'string'}, {:name => 'ead_id', :sqltype => 'text', :jdbctype => 'string'}, + {:name => 'ead_location', :sqltype => 'text', :jdbctype => 'string'}, ] def initialize(db_file) From 153b04752a08a5ec94996ee68692eab39cb41145 Mon Sep 17 00:00:00 2001 From: jambun Date: Wed, 28 Jun 2017 15:52:00 +1000 Subject: [PATCH 2/5] add support for gems in exporter_app --- exporter_app/Gemfile | 3 +++ exporter_app/Gemfile.lock | 40 +++++++++++++++++++++++++++++++++++ exporter_app/bin/bootstrap.sh | 12 +++++++++++ exporter_app/bin/startup.sh | 3 +++ 4 files changed, 58 insertions(+) create mode 100644 exporter_app/Gemfile create mode 100644 exporter_app/Gemfile.lock create mode 100755 exporter_app/bin/bootstrap.sh diff --git a/exporter_app/Gemfile b/exporter_app/Gemfile new file mode 100644 index 0000000..c6297b1 --- /dev/null +++ b/exporter_app/Gemfile @@ -0,0 +1,3 @@ +source 'http://rubygems.org' + +gem 'savon', '~> 2.11', '>= 2.11.1' diff --git a/exporter_app/Gemfile.lock b/exporter_app/Gemfile.lock new file mode 100644 index 0000000..f062cbf --- /dev/null +++ b/exporter_app/Gemfile.lock @@ -0,0 +1,40 @@ +GEM + remote: http://rubygems.org/ + specs: + akami (1.3.1) + gyoku (>= 0.4.0) + nokogiri + builder (3.2.3) + gyoku (1.3.1) + builder (>= 2.1.2) + httpi (2.4.2) + rack + socksify + mini_portile2 (2.1.0) + nokogiri (1.6.8.1) + mini_portile2 (~> 2.1.0) + nokogiri (1.6.8.1-java) + nori (2.6.0) + rack (1.6.8) + savon (2.11.1) + akami (~> 1.2) + builder (>= 2.1.2) + gyoku (~> 1.2) + httpi (~> 2.3) + nokogiri (>= 1.4.0) + nori (~> 2.4) + wasabi (~> 3.4) + socksify (1.7.1) + wasabi (3.5.0) + httpi (~> 2.0) + nokogiri (>= 1.4.2) + +PLATFORMS + java + ruby + +DEPENDENCIES + savon (~> 2.11, >= 2.11.1) + +BUNDLED WITH + 1.15.1 diff --git a/exporter_app/bin/bootstrap.sh b/exporter_app/bin/bootstrap.sh new file mode 100755 index 0000000..c6c6779 --- /dev/null +++ b/exporter_app/bin/bootstrap.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +BASEDIR=$(dirname "$0")/../ + +export GEM_HOME="$BASEDIR/gems" +export GEM_PATH="$BASEDIR/gems" + +cd "$BASEDIR" + +java -cp bin/jruby-complete-9.1.0.0.jar org.jruby.Main -S gem install bundler + +java -cp bin/jruby-complete-9.1.0.0.jar org.jruby.Main gems/bin/bundle install diff --git a/exporter_app/bin/startup.sh b/exporter_app/bin/startup.sh index 65b091d..3425e2c 100755 --- a/exporter_app/bin/startup.sh +++ b/exporter_app/bin/startup.sh @@ -2,6 +2,9 @@ BASEDIR=$(dirname "$0")/../ +export GEM_HOME="$BASEDIR/gems" +export GEM_PATH="$BASEDIR/gems" + cd "$BASEDIR" mkdir -p logs exec java $JAVA_OPTS -Darchivesspace-exporter=yes -Dfile.encoding=UTF-8 -cp "bin/*:java_lib/*:$CLASSPATH" org.jruby.Main -- exporter_app.rb 2>logs/exporter_app.err From b4666fd42b422de22bec4adb81994d65cae3ebca Mon Sep 17 00:00:00 2001 From: jambun Date: Wed, 28 Jun 2017 15:55:43 +1000 Subject: [PATCH 3/5] WIP: talk to handle soap api --- exporter_app/config/config.rb | 8 ++-- exporter_app/exporter_app.rb | 3 ++ exporter_app/tasks/export_ead_task.rb | 2 +- exporter_app/tasks/lib/handle_client.rb | 55 ++++++++++++++++++++++--- 4 files changed, 58 insertions(+), 10 deletions(-) diff --git a/exporter_app/config/config.rb b/exporter_app/config/config.rb index 723d0ae..58100f5 100644 --- a/exporter_app/config/config.rb +++ b/exporter_app/config/config.rb @@ -4,11 +4,11 @@ aspace_backend_url: 'http://localhost:4567/', handle_wsdl_url: 'http://link.its.yale.edu/ypls-ws/PersistentLinking?wsdl', - handle_user: '10079/FA', + handle_user: '10079.1/FA', handle_credential: '[YOUR CREDENTIAL]', - handle_prefix: '10079/fa', - handle_group: '10079/FA', - handle_base: 'http://findit.library.yale.edu/catalog', + handle_prefix: '10079.1/fa', + handle_group: '10079.1/FA', + handle_base: 'http://archives.yale.edu', log_level: 'debug' } diff --git a/exporter_app/exporter_app.rb b/exporter_app/exporter_app.rb index 75ccf1c..d5dbc3a 100644 --- a/exporter_app/exporter_app.rb +++ b/exporter_app/exporter_app.rb @@ -1,3 +1,6 @@ +require 'bundler/setup' +Bundler.require + class ExporterApp POLL_INTERVAL = 60 diff --git a/exporter_app/tasks/export_ead_task.rb b/exporter_app/tasks/export_ead_task.rb index 0097dd9..32991b3 100644 --- a/exporter_app/tasks/export_ead_task.rb +++ b/exporter_app/tasks/export_ead_task.rb @@ -168,7 +168,7 @@ def ensure_handle(item) @log.debug("ead_id: '#{item[:ead_id]}', ead_location: '#{item[:ead_location]}'") if !item[:ead_location] && item[:ead_id] - handle = @handle_client.create_handle(item[:ead_id]) + handle = @handle_client.create_handle(item[:ead_id], item[:uri]) @log.debug("Created handle: #{handle}") response = @as_client.update_record(item[:uri], 'ead_location' => handle) @log.debug("Updated resource: #{response}") diff --git a/exporter_app/tasks/lib/handle_client.rb b/exporter_app/tasks/lib/handle_client.rb index 5babca7..10669c0 100644 --- a/exporter_app/tasks/lib/handle_client.rb +++ b/exporter_app/tasks/lib/handle_client.rb @@ -1,18 +1,63 @@ -require 'net/http' -require 'net/https' +require 'savon' class HandleClient - def initialize(wsdl_url, user, credential, prefix, group, handle_base = 'http://hdl.handle.net') + HANDLE_HOST = 'http://hdl.handle.net' + + def initialize(wsdl_url, user, credential, prefix, group, handle_base) @wsdl_url = wsdl_url @user = user @credential = credential @prefix = prefix @group = group @handle_base = handle_base + + # looks like the namespace stuff from the example code isn't required + # leaving it commented for now in case it is needed later + # @namespace = @prefix.sub(/.*?\//, '') + ':' + + @client = Savon.client(wsdl: @wsdl_url) + end + + def create_handle(id, uri) + # unless id.include?(@namespace) + # raise "Handle prefix namespace '#{@namespace}' doesn't match namespace of id '#{id}'" + # end + # handle = [@prefix, id.sub(@namespace, '')].join('/') + + handle = [@prefix, id].join('/') + + response = @client.call(:create_batch_semantic, xml: soap_envelope(handle, uri)) + + unless response.success? + raise "Failed to create handle for id '#{id}' with uri '#{uri}': #{response.to_xml.to_s}" + end + + [HANDLE_HOST, handle].join('/') end - def create_handle(id) - [@handle_base, @prefix, id].join('/') + private + + def soap_envelope(handle, uri) + <<-EOT + + + + + + + #{handle} + #{@handle_base}#{uri} + + + + #{@group} + #{@user} + #{@credential} + + + +EOT end + end From 8e60518fb7d1adb8337a72e907446790bfbdd8b0 Mon Sep 17 00:00:00 2001 From: jambun Date: Thu, 29 Jun 2017 11:04:22 +1000 Subject: [PATCH 4/5] updated readme --- README.md | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/README.md b/README.md index c7e5252..7e7fa77 100644 --- a/README.md +++ b/README.md @@ -61,6 +61,22 @@ And shut it down like this: $ cd /path/to/archivesspace_export_service/exporter_app $ bin/shutdown.sh +The exporter application now uses gems. If running from source +you will need to dowload the required gems like this: + + $ cd /path/to/archivesspace_export_service/exporter_app + $ bin/bootstrap.sh + +This step is not required if running from a distributed release. + +UPGRADE NOTE: If upgrading from v1.0, you will need to remove +the ead export work queue database before starting the application, +like this: + + $ cd /path/to/archivesspace_export_service/exporter_app + $ rm workspace/ead/db/ead_export.sqlite + + See below for configuration options. @@ -87,6 +103,24 @@ frontend web UI. If the Exporter Application is deployed on a different machine from ArchivesSpace you may need to configure your firewall to open the backend port. +If you intend to use the Handle creation feature, you will also +need to set some handle related configuration options, like this: + + { + handle_wsdl_url: 'http://link.its.yale.edu/ypls-ws/PersistentLinking?wsdl', + handle_user: '10079.1/FA', + handle_credential: '[YOUR CREDENTIAL]', + handle_prefix: '10079.1/fa', + handle_group: '10079.1/FA', + handle_base: 'http://archives.yale.edu', + } + +And if using Handles, the configured ArchivesSpace user (`a_user` above) +will need permissions to update resources on any ArchivesSpace repository +from which resources will be exported, in addition to the permission +discussed above. This is because generated Handles are written back to +the resource (in the ead_location field). + ## How it works @@ -235,6 +269,12 @@ of `:task_parameters`. These are as follows: * `:numbered_cs` - Use numbered c tags in ead (default: false) + * `:generate_handles` - If set to `true` then a Handle will be created + immediately before export for any resources that have a value in + `ead_id` but do not have a value in `ead_location`. The created + Handle will be written back to the resource in the `ead_location` + field. + The sample `jobs.rb` file shows a fully configured ExportEADTask which makes use of `:after_hooks` (described below) to additionally produce PDF versions of finding aids and a table of contents. Note that the From 2534ab545b5c8220ba5cbc471106749541275d08 Mon Sep 17 00:00:00 2001 From: jambun Date: Thu, 29 Jun 2017 13:58:31 +1000 Subject: [PATCH 5/5] escape values added to soap envelope --- exporter_app/tasks/lib/handle_client.rb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/exporter_app/tasks/lib/handle_client.rb b/exporter_app/tasks/lib/handle_client.rb index 10669c0..0aa482a 100644 --- a/exporter_app/tasks/lib/handle_client.rb +++ b/exporter_app/tasks/lib/handle_client.rb @@ -46,14 +46,14 @@ def soap_envelope(handle, uri) - #{handle} - #{@handle_base}#{uri} + #{handle.encode(:xml => :text)} + #{@handle_base.encode(:xml => :text)}#{uri.encode(:xml => :text)} - #{@group} - #{@user} - #{@credential} + #{@group.encode(:xml => :text)} + #{@user.encode(:xml => :text)} + #{@credential.encode(:xml => :text)}