diff --git a/app/services/iiif_print/derivative_rodeo_service.rb b/app/services/iiif_print/derivative_rodeo_service.rb index 7afe3d85..c177a0d9 100644 --- a/app/services/iiif_print/derivative_rodeo_service.rb +++ b/app/services/iiif_print/derivative_rodeo_service.rb @@ -47,9 +47,18 @@ class DerivativeRodeoService # implementations for Adventist. Those are reasonable assumptions but time will tell how # reasonable. # + # By convention, this method is returning output_location of the SpaceStone::Serverless + # processing. We might know the original location that SpaceStone::Serverless processed, but + # that seems to be a tenuous assumption. + # + # In other words, where would SpaceStone, by convention, have written the original file and by + # convention written that original file's derivatives. + # + # TODO: We also need to account for PDF splitting + # # @param file_set [FileSet] # @return [String] - def self.derivative_rodeo_input_uri(file_set:) + def self.derivative_rodeo_input_uri(file_set:, filename: nil) return @derivative_rodeo_input_uri if defined?(@derivative_rodeo_input_uri) # TODO: URGENT For a child work (e.g. an image split off of a PDF) we will know that the file_set's @@ -70,7 +79,7 @@ def self.derivative_rodeo_input_uri(file_set:) # expendiency, I'm using it. See # https://github.com/samvera/hydra-works/blob/c9b9dd0cf11de671920ba0a7161db68ccf9b7f6d/lib/hydra/works/services/add_file_to_file_set.rb#L49-L53 # TODO: Could we get away with filename that is passed in the create_derivatives process? - filename = Hydra::Works::DetermineOriginalName.call(file_set.original_file) + filename ||= Hydra::Works::DetermineOriginalName.call(file_set.original_file) # TODO: What kinds of exceptions might we raise if the location is not configured? Do we need # to "validate" it in another step. diff --git a/lib/iiif_print/jobs/child_works_from_pdf_job.rb b/lib/iiif_print/jobs/child_works_from_pdf_job.rb index 5eea0ad1..e61b17b2 100644 --- a/lib/iiif_print/jobs/child_works_from_pdf_job.rb +++ b/lib/iiif_print/jobs/child_works_from_pdf_job.rb @@ -97,6 +97,13 @@ def prepare_import_data(original_pdf_path, image_files, user) PendingRelationship.create!(child_title: child_title, parent_id: @parent_work.id, child_order: child_title) + + begin + # Clean up the temporary image path. + File.rm_f(image_path) if File.exist?(image_path) + rescue + # If we can't delete, let's move on. Maybe it was already cleaned-up. + end end end # rubocop:enable Metrics/MethodLength diff --git a/lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb b/lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb index 9745cbff..3ba7d5df 100644 --- a/lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb +++ b/lib/iiif_print/split_pdfs/derivative_rodeo_splitter.rb @@ -1,41 +1,70 @@ module IiifPrint module SplitPdfs + ## + # This class wraps the DerivativeRodeo::Generators::PdfSplitGenerator to find preprocessed + # images, or split a PDF if there are no preprocessed images. + # + # We have already attached the original file to the file_set. We want to convert that original + # file that's attached to a input_uri (e.g. "file://path/to/original-file" as in what we have + # written to Fedora as the PDF) + # + # @see .call class DerivativeRodeoSplitter ## - # This class wraps the DerivativeRodeo::Generators::PdfSplitGenerator to find preprocessed - # images, or split a PDF if there are no preprocessed images. - # - # TODO: override output extension from default "tiff" in Derivative Rodeo - # TODO: define output_location_template & preprocessed_location_template - - ## - # @param _path [String] unused here, kept for consistant splitter method signature + # @param path [String] the local file location # @param file_set [FileSet] file set containing a PDF file to split + # # @return [Array] paths to images split from each page of PDF file - def self.call(_path, file_set:) - new(file_set: file_set).split_files + def self.call(path, file_set:) + new(path, file_set: file_set).split_files end - def initialize(file_set:) - @path = IiifPrint::DerivativeRodeoService.derivative_rodeo_input_uri(file_set: file_set) - end + def initialize(path, file_set:, output_tmp_dir: Dir.tmpdir) + @input_uri = "file://#{path}" - def split_files - DerivativeRodeo::Generators::PdfSplitGenerator.new( - input_uris: @path, - output_location_template: template, - preprocessed_location_template: location - ).generated_files + # We are writing the images to a location that CarrierWave can upload. + # + # https://github.com/scientist-softserv/iiif_print/blob/b969541de1a0526305b54de37bf7cf100289f088/lib/iiif_print/jobs/child_works_from_pdf_job.rb#L108 + output_template_path = File.join(output_tmp_dir, '{{ dir_parts[-1..-1] }}', '{{ filename }}') + + @output_location_template = "file://#{output_template_path}" + @preprocessed_location_template = IiifPrint::DerivativeRodeoService.derivative_rodeo_input_uri(file_set: file_set, filename: filename) end - private + ## + # This is where, in "Fedora" we have the original file. This is not the original file in the + # pre-processing location but instead the long-term location of the file in the application + # that mounts IIIF Print. + # + # @return [String] + attr_reader :input_uri - def template - 'who knows' - end + ## + # This is the location where we're going to write the derivatives that will "go into Fedora". + # + # @return [String] + attr_reader :output_location_template + + ## + # Where can we find, in the DerivativeRodeo's storage, what has already been done regarding + # derivative generation. + # + # For example, SpaceStone::Serverless will pre-process derivatives and write them into an S3 + # bucket that we then use for IIIF Print. + # + # @return [String] + # + # @see https://github.com/scientist-softserv/space_stone-serverless/blob/7f46dd5b218381739cd1c771183f95408a4e0752/awslambda/handler.rb#L58-L63 + attr_reader :preprocessed_location_template - def location - 'who knows' + ## + # @return [Array] the paths to each of the images split off from the PDF. + def split_files + DerivativeRodeo::Generators::PdfSplitGenerator.new( + input_uris: [@input_uri], + output_location_template: output_location_template, + preprocessed_location_template: preprocessed_location_template + ).generated_files.map { |location| location.file_path } end end end