Skip to content

Commit

Permalink
🎁 Add derivative_rodeo_splitter
Browse files Browse the repository at this point in the history
Add a new PDF splitter option that wraps the DerivateRodeo's PdfSplitGenerator.

ref #220
  • Loading branch information
laritakr committed Jun 1, 2023
1 parent 6ce7d88 commit b969541
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 6 deletions.
1 change: 1 addition & 0 deletions lib/iiif_print.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
require "iiif_print/jobs/child_works_from_pdf_job"
require "iiif_print/split_pdfs/base_splitter"
require "iiif_print/split_pdfs/child_work_creation_from_pdf_service"
require "iiif_print/split_pdfs/derivative_rodeo_splitter"

module IiifPrint
extend ActiveSupport::Autoload
Expand Down
12 changes: 6 additions & 6 deletions lib/iiif_print/jobs/child_works_from_pdf_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,19 @@ def perform(candidate_for_parency, pdf_paths, user, admin_set_id, *)
# We know that we have cases where parent_work is nil, this will definitely raise an
# exception; which is fine because we were going to do it later anyway.
@parent_work = if candidate_for_parency.work?
pdf_file_set = nil
candidate_for_parency
else
# We likely have a file set
pdf_file_set = candidate_for_parency
IiifPrint.parent_for(candidate_for_parency)
end
@child_admin_set_id = admin_set_id
child_model = @parent_work.iiif_print_config.pdf_split_child_model

# handle each input pdf
# handle each input pdf (when input is a file set, we will only have one).
pdf_paths.each do |original_pdf_path|
split_pdf(original_pdf_path, user, child_model)
split_pdf(original_pdf_path, user, child_model, pdf_file_set)
end

# Link newly created child works to the parent
Expand All @@ -47,10 +49,8 @@ def perform(candidate_for_parency, pdf_paths, user, admin_set_id, *)
private

# rubocop:disable Metrics/ParameterLists
def split_pdf(original_pdf_path, user, child_model)
# TODO: This is the place to change out the existing service and instead use the derivative
# rodeo; we will likely need to look at method signatures to tighten this interface.
image_files = @parent_work.iiif_print_config.pdf_splitter_service.call(original_pdf_path)
def split_pdf(original_pdf_path, user, child_model, pdf_file_set)
image_files = @parent_work.iiif_print_config.pdf_splitter_service.call(original_pdf_path, file_set: pdf_file_set)
return if image_files.blank?

prepare_import_data(original_pdf_path, image_files, user)
Expand Down
42 changes: 42 additions & 0 deletions lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
module IiifPrint
module SplitPdfs
class DerivativeRodeoSplitter
##
# This class wraps the DerivativeRodeo::Generators::PdfSplitGenerator to find preprocessed
# images, or split a PDF if there are no preprocessed images.
#
# TODO: override output extension from default "tiff" in Derivative Rodeo
# TODO: define output_location_template & preprocessed_location_template

##
# @param _path [String] unused here, kept for consistant splitter method signature
# @param file_set [FileSet] file set containing a PDF file to split
# @return [Array] paths to images split from each page of PDF file
def self.call(_path, file_set:)
new(file_set: file_set).split_files
end

def initialize(file_set:)
@path = IiifPrint::DerivativeRodeoService.derivative_rodeo_input_uri(file_set: file_set)
end

def split_files
DerivativeRodeo::Generators::PdfSplitGenerator.new(
input_uris: @path,
output_location_template: template,
preprocessed_location_template: location
).generated_files
end

private

def template
'who knows'
end

def location
'who knows'
end
end
end
end
33 changes: 33 additions & 0 deletions spec/iiif_print/split_pdfs/derivative_rodeo_splitter_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# frozen_string_literal: true

require 'spec_helper'

RSpec.describe IiifPrint::SplitPdfs::DerivativeRodeoSplitter do
let(:path) { nil }
let(:work) { double(MyWork, aark_id: '12345') }
let(:file_set) { FileSet.new.tap { |fs| fs.save!(validate: false) } }

describe 'class' do
subject { described_class }

it { is_expected.to respond_to(:call) }
end

describe "instance" do
subject { described_class.new(file_set: file_set) }

before do
allow(file_set).to receive(:parent).and_return(work)
# TODO: This is a hack that leverages the internals of Hydra::Works; not excited about it but
# this part is only one piece of the over all integration.
allow(file_set).to receive(:original_file).and_return(double(original_filename: __FILE__))
end

it { is_expected.to respond_to :split_files }

it 'uses the rodeo to split' do
expect(DerivativeRodeo::Generators::PdfSplitGenerator).to receive(:new)
described_class.call(path, file_set: file_set)
end
end
end

0 comments on commit b969541

Please sign in to comment.