diff --git a/lib/iiif_print.rb b/lib/iiif_print.rb index 38e6f9d6..a9d164ea 100644 --- a/lib/iiif_print.rb +++ b/lib/iiif_print.rb @@ -19,6 +19,7 @@ require "iiif_print/jobs/child_works_from_pdf_job" require "iiif_print/split_pdfs/base_splitter" require "iiif_print/split_pdfs/child_work_creation_from_pdf_service" +require "iiif_print/split_pdfs/derivative_rodeo_splitter" module IiifPrint extend ActiveSupport::Autoload diff --git a/lib/iiif_print/jobs/child_works_from_pdf_job.rb b/lib/iiif_print/jobs/child_works_from_pdf_job.rb index fef883ce..5eea0ad1 100644 --- a/lib/iiif_print/jobs/child_works_from_pdf_job.rb +++ b/lib/iiif_print/jobs/child_works_from_pdf_job.rb @@ -15,17 +15,19 @@ def perform(candidate_for_parency, pdf_paths, user, admin_set_id, *) # We know that we have cases where parent_work is nil, this will definitely raise an # exception; which is fine because we were going to do it later anyway. @parent_work = if candidate_for_parency.work? + pdf_file_set = nil candidate_for_parency else # We likely have a file set + pdf_file_set = candidate_for_parency IiifPrint.parent_for(candidate_for_parency) end @child_admin_set_id = admin_set_id child_model = @parent_work.iiif_print_config.pdf_split_child_model - # handle each input pdf + # handle each input pdf (when input is a file set, we will only have one). pdf_paths.each do |original_pdf_path| - split_pdf(original_pdf_path, user, child_model) + split_pdf(original_pdf_path, user, child_model, pdf_file_set) end # Link newly created child works to the parent @@ -47,10 +49,8 @@ def perform(candidate_for_parency, pdf_paths, user, admin_set_id, *) private # rubocop:disable Metrics/ParameterLists - def split_pdf(original_pdf_path, user, child_model) - # TODO: This is the place to change out the existing service and instead use the derivative - # rodeo; we will likely need to look at method signatures to tighten this interface. - image_files = @parent_work.iiif_print_config.pdf_splitter_service.call(original_pdf_path) + def split_pdf(original_pdf_path, user, child_model, pdf_file_set) + image_files = @parent_work.iiif_print_config.pdf_splitter_service.call(original_pdf_path, file_set: pdf_file_set) return if image_files.blank? prepare_import_data(original_pdf_path, image_files, user) diff --git a/lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb b/lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb new file mode 100644 index 00000000..9745cbff --- /dev/null +++ b/lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb @@ -0,0 +1,42 @@ +module IiifPrint + module SplitPdfs + class DerivativeRodeoSplitter + ## + # This class wraps the DerivativeRodeo::Generators::PdfSplitGenerator to find preprocessed + # images, or split a PDF if there are no preprocessed images. + # + # TODO: override output extension from default "tiff" in Derivative Rodeo + # TODO: define output_location_template & preprocessed_location_template + + ## + # @param _path [String] unused here, kept for consistant splitter method signature + # @param file_set [FileSet] file set containing a PDF file to split + # @return [Array] paths to images split from each page of PDF file + def self.call(_path, file_set:) + new(file_set: file_set).split_files + end + + def initialize(file_set:) + @path = IiifPrint::DerivativeRodeoService.derivative_rodeo_input_uri(file_set: file_set) + end + + def split_files + DerivativeRodeo::Generators::PdfSplitGenerator.new( + input_uris: @path, + output_location_template: template, + preprocessed_location_template: location + ).generated_files + end + + private + + def template + 'who knows' + end + + def location + 'who knows' + end + end + end +end diff --git a/spec/iiif_print/split_pdfs/derivative_rodeo_splitter_spec.rb b/spec/iiif_print/split_pdfs/derivative_rodeo_splitter_spec.rb new file mode 100644 index 00000000..e872f39d --- /dev/null +++ b/spec/iiif_print/split_pdfs/derivative_rodeo_splitter_spec.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe IiifPrint::SplitPdfs::DerivativeRodeoSplitter do + let(:path) { nil } + let(:work) { double(MyWork, aark_id: '12345') } + let(:file_set) { FileSet.new.tap { |fs| fs.save!(validate: false) } } + + describe 'class' do + subject { described_class } + + it { is_expected.to respond_to(:call) } + end + + describe "instance" do + subject { described_class.new(file_set: file_set) } + + before do + allow(file_set).to receive(:parent).and_return(work) + # TODO: This is a hack that leverages the internals of Hydra::Works; not excited about it but + # this part is only one piece of the over all integration. + allow(file_set).to receive(:original_file).and_return(double(original_filename: __FILE__)) + end + + it { is_expected.to respond_to :split_files } + + it 'uses the rodeo to split' do + expect(DerivativeRodeo::Generators::PdfSplitGenerator).to receive(:new) + described_class.call(path, file_set: file_set) + end + end +end