Skip to content

Commit

Permalink
Merge pull request #239 from scientist-softserv/i233-rework_relations…
Browse files Browse the repository at this point in the history
…hips_job

Rework relationships job
  • Loading branch information
laritakr authored May 17, 2023
2 parents 124cc4d + 2baac73 commit 8fdf56e
Show file tree
Hide file tree
Showing 5 changed files with 166 additions and 41 deletions.
1 change: 1 addition & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ Metrics/MethodLength:
- 'lib/generators/iiif_print/catalog_controller_generator.rb'
- 'lib/iiif_print/ingest/ndnp/ndnp_mets_helper.rb'
- 'lib/iiif_print/ingest/pdf_issue_ingester.rb'
- 'lib/iiif_print/jobs/create_relationships_job.rb'
- 'spec/model_shared.rb'

Naming/PredicateName:
Expand Down
1 change: 0 additions & 1 deletion lib/iiif_print.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
require "iiif_print/jobs/application_job"
require "iiif_print/blacklight_iiif_search/annotation_decorator"
require "iiif_print/jobs/child_works_from_pdf_job"
require "iiif_print/jobs/create_relationships_job"
require "iiif_print/split_pdfs/base_splitter"
require "iiif_print/split_pdfs/child_work_creation_from_pdf_service"

Expand Down
1 change: 1 addition & 0 deletions lib/iiif_print/engine.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class Engine < ::Rails::Engine

# rubocop:disable Metrics/BlockLength
config.to_prepare do
require "iiif_print/jobs/create_relationships_job"
# We don't have a hard requirement of Bullkrax but in our experience, lingering on earlier
# versions can introduce bugs of both Bulkrax and some of the assumptions that we've resolved.
# Very early versions of Bulkrax do not have VERSION defined
Expand Down
104 changes: 73 additions & 31 deletions lib/iiif_print/jobs/create_relationships_job.rb
Original file line number Diff line number Diff line change
@@ -1,41 +1,60 @@
module IiifPrint
module Jobs
# Break a pdf into individual pages
# Link newly created child works to the parent
class CreateRelationshipsJob < IiifPrint::Jobs::ApplicationJob
# Link newly created child works to the parent
# @param user: [User] user
include Hyrax::Lockable

RETRY_MAX = 10

# @param parent_id: [<String>] parent work id
# @param parent_model: [<String>] parent model
# @param child_model: [<String>] child model
def perform(user:, parent_id:, parent_model:, child_model:)
if completed_child_data_for(parent_id, child_model)
# @param retries: [<Integer>] count used during rescheduling to prevent infinite retries
def perform(parent_id:, parent_model:, child_model:, retries: 0, **)
@parent_id = parent_id
@parent_model = parent_model
@child_model = child_model
@retries = retries + 1

@number_of_successes = 0
@number_of_failures = 0
@parent_record_members_added = false
@errors = []

# Because we need our children in the correct order, we can't create any
# relationships until all child works have been created.
if completed_child_data
# add the members
parent_work = parent_model.constantize.find(parent_id)
create_relationships(user: user, parent: parent_work, ordered_children: @child_works)
@pending_children.each(&:destroy)
add_children_to_parent
if @number_of_failures.zero? && @number_of_successes == @pending_children.count
# remove pending relationships upon valid completion
@pending_children.each(&:destroy)
else
raise "CreateRelationshipsJob failed for parent id: #{@parent_id} " \
"had #{@number_of_successes} successes & #{@number_of_failures} failures, " \
"with errors: #{@errors}"
end
else
# reschedule the job and end this one normally
#
# TODO: Depending on how things shake out, we could be infinitely rescheduling this job.
# Consider a time to live parameter.
reschedule(user: user, parent_id: parent_id, parent_model: parent_model, child_model: child_model)
# if we aren't ready yet, reschedule the job and end this one normally
reschedule_job
end
end

private

# load @child_works, and return true or false
def completed_child_data_for(parent_id, child_model)
# load @child_works and @pending children, and
# return boolean indicating whether all chilren are present
def completed_child_data
@child_works = []
found_all_children = true

# find and sequence all pending children
@pending_children = IiifPrint::PendingRelationship.where(parent_id: parent_id).order('child_order asc')
@pending_children = IiifPrint::PendingRelationship.where(parent_id: @parent_id).order('child_order asc')

# find child works (skip out if any haven't yet been created)
@pending_children.each do |child|
# find by title... if any aren't found, the child works are not yet ready
found_children = find_children_by_title_for(child.child_title, child_model)
found_children = find_children_by_title_for(child.child_title, @child_model)
found_all_children = false if found_children.empty?
break unless found_all_children == true
@child_works += found_children
Expand All @@ -49,30 +68,53 @@ def find_children_by_title_for(title, model)
model.constantize.where(title: title)
end

def reschedule(user:, parent_id:, parent_model:, child_model:)
def add_children_to_parent
parent_work = @parent_model.constantize.find(@parent_id)
create_relationships(parent: parent_work, ordered_children: @child_works)
end

def reschedule_job
return if @retries > RETRY_MAX
CreateRelationshipsJob.set(wait: 10.minutes).perform_later(
user: user,
parent_id: parent_id,
parent_model: parent_model,
child_model: child_model
parent_id: @parent_id,
parent_model: @parent_model,
child_model: @child_model,
retries: @retries
)
end

def create_relationships(user:, parent:, ordered_children:)
records_hash = {}
ordered_children.map(&:id).each_with_index do |child_id, i|
records_hash[i.to_s] = { id: child_id }
def create_relationships(parent:, ordered_children:)
acquire_lock_for(parent.id) do
# Not sure uncached is needed here, but kept
# for consistency with Bulkrax's relationships logic
ActiveRecord::Base.uncached do
ordered_children.each do |child|
add_to_work(child_record: child, parent_record: parent)
@number_of_successes += 1
rescue => e
@number_of_failures += 1
@errors << e
end
end
parent.save! if @parent_record_members_added && @number_of_failures.zero?
end
attrs = { work_members_attributes: records_hash }
parent.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX)
env = Hyrax::Actors::Environment.new(parent, Ability.new(user), attrs)

raise env unless Hyrax::CurationConcern.actor.update(env)
# need to reindex all file_sets to make all ancestors are indexed
# Bulkrax no longer reindexes file_sets, but IiifPrint needs both to add is_page_of_ssim for universal viewer.
# This is where child works need to be indexed (AFTER the parent save), as opposed to how Bulkrax does it.
ordered_children.each do |child_work|
child_work.update_index
child_work.file_sets.each(&:update_index) if child_work.respond_to?(:file_sets)
end
end

def add_to_work(child_record:, parent_record:)
return true if parent_record.ordered_members.to_a.include?(child_record)

parent_record.ordered_members << child_record
@parent_record_members_added = true
# Bulkrax does child_record.save! here, but it makes no sense
# as there is nothing to save or index at this point.
end
end
end
end
100 changes: 91 additions & 9 deletions spec/iiif_print/jobs/create_relationships_job_spec.rb
Original file line number Diff line number Diff line change
@@ -1,17 +1,99 @@
# frozen_string_literal: true

require 'spec_helper'
require 'misc_shared'

RSpec.describe IiifPrint::Jobs::CreateRelationshipsJob do
# TODO: add specs
let(:parent) { WorkWithIiifPrintConfig.new(title: ['required title']) }
let(:my_user) { build(:user) }
let(:parent_model) { WorkWithIiifPrintConfig }
let(:child_model) { WorkWithIiifPrintConfig }
module IiifPrint::Jobs
RSpec.describe CreateRelationshipsJob, type: :job do
let(:create_relationships_job) { described_class.new }

let(:parent_model) { WorkWithIiifPrintConfig.to_s }
let(:child_model) { WorkWithIiifPrintConfig.to_s }
let(:parent_record) { WorkWithIiifPrintConfig.new(title: ['required title']) }
let(:child_record1) { WorkWithIiifPrintConfig.new(title: ["Child of #{parent_record.id} page 01"]) }
let(:child_record2) { WorkWithIiifPrintConfig.new(title: ["Child of #{parent_record.id} page 02"]) }
let(:pending_rel1) { IiifPrint::PendingRelationship.new(parent_id: parent_record.id, child_title: "Child of #{parent_record.id} page 01", child_order: "Child of #{parent_record.id} page 01") }
let(:pending_rel2) { IiifPrint::PendingRelationship.new(parent_id: parent_record.id, child_title: "Child of #{parent_record.id} page 02", child_order: "Child of #{parent_record.id} page 02") }

describe '#perform' do
before do
allow(create_relationships_job).to receive(:acquire_lock_for).and_yield
allow(create_relationships_job).to receive(:reschedule_job)
allow(parent_record).to receive(:save!)

parent_record.save
pending_rel1.save
pending_rel2.save
end

subject(:perform) do
create_relationships_job.perform(
parent_id: parent_record.id,
parent_model: parent_model,
child_model: child_model,
retries: 0
)
end

context 'when adding a child work to a parent work' do
before do
child_record1.save
child_record2.save
end

it 'assigns the child to the parent\'s #ordered_members' do
perform
expect(parent_record.reload.ordered_member_ids).to eq([child_record1.id, child_record2.id])
end

it 'deletes the pending relationships' do
expect { perform }.to change(IiifPrint::PendingRelationship, :count).by(-2)
end

it 'does not reschedule the job' do
perform
expect(create_relationships_job).not_to have_received(:reschedule_job)
end
end

context 'when a relationship fails' do
before do
child_record1.save
child_record2.save
end

before do
expect_any_instance_of(CreateRelationshipsJob).to receive(:add_to_work).and_raise('error')
end

it 'does not save the parent' do
expect { perform }.to raise_error(RuntimeError)
expect(parent_record).not_to have_received(:save!)
end

it 'does not delete the pending relationships' do
expect { perform }.to raise_error(RuntimeError)
expect(IiifPrint::PendingRelationship.where(parent_id: parent_record.id).count).to eq(2)
end
end

context 'when any child record is not found' do
let(:child_record2) { nil }

before do
child_record1.save
end

let(:subject) { described_class.perform(user: my_user, parent_id: parent.id, parent_model: parent_model, child_model: child_model) }
it 'does not save the parent' do
perform
expect(parent_record).not_to have_received(:save!)
end

describe '#perform' do
xit 'loads all child work ids into ordered_members' do
it 'reschedules the job' do
perform
expect(create_relationships_job).to have_received(:reschedule_job)
end
end
end
end
end

0 comments on commit 8fdf56e

Please sign in to comment.