diff --git a/app/services/iiif_print/derivative_rodeo_service.rb b/app/services/iiif_print/derivative_rodeo_service.rb new file mode 100644 index 00000000..8e9081bf --- /dev/null +++ b/app/services/iiif_print/derivative_rodeo_service.rb @@ -0,0 +1,128 @@ +module IiifPrint + ## + # This class implements the interface of a Hyrax::DerivativeService. + # + # @see https://github.com/samvera/hyrax/blob/main/app/services/hyrax/derivative_service.rb Hyrax::DerivativesService + class DerivativeRodeoService + ## + # @!group Class Attributes + # + # @attr parent_work_identifier_property [String] the property we use to identify the unique + # identifier of the parent work as it went through the SpaceStone pre-process. + # + # TODO: The default of :aark_id is a quick hack for adventist. By exposing a configuration + # value, my hope is that this becomes easier to configure. + class_attribute :parent_work_identifier_property, default: 'aark_id' + + ## + # @attr input_location_adapter_name [String] The name of a derivative rodeo storage location; + # this will must be a registered with the DerivativeRodeo::StorageLocations::BaseLocation. + class_attribute :input_location_adapter_name, default: 's3' + + ## + # @attr named_derivatives_and_generators_by_type [Hash] the named derivative and it's + # associated generator. The "name" is important for Hyrax. The generator is one that + # exists in the DerivativeRodeo. + # + # TODO: Could be nice to have a registry for the DerivativeRodeo::Generators; but that's a + # tomorrow wish. + class_attribute(:named_derivatives_and_generators_by_type, default: { + pdf: { thumbnail: "DerivativeRodeo::Generators::ThumbnailGenerator" } + }) + # @!endgroup Class Attributes + ## + + ## + # This method "hard-codes" some existing assumptions about the input_uri based on + # implementations for Adventist. Those are reasonable assumptions but time will tell how + # reasonable. + # + # @param file_set [FileSet] + # @return [String] + def self.input_uri_for(file_set:) + # TODO: This is duplicated logic for another service, consider extracting a helper module; + # better yet wouldn't it be nice if Hyrax did this right and proper. + parent = file_set.parent || file_set.member_of.find(&:work?) + raise IiifPrint::DataError, "Parent not found for #{file_set.class} ID=#{file_set.id}" unless parent + + dirname = parent.public_send(parent_work_identifier_property) + + # TODO: This is a hack that knows about the inner workings of Hydra::Works, but for + # expendiency, I'm using it. See + # https://github.com/samvera/hydra-works/blob/c9b9dd0cf11de671920ba0a7161db68ccf9b7f6d/lib/hydra/works/services/add_file_to_file_set.rb#L49-L53 + # TODO: Could we get away with filename that is passed in the create_derivatives process? + filename = Hydra::Works::DetermineOriginalName.call(file_set.original_file) + + # TODO: What kinds of exceptions might we raise if the location is not configured? Do we need + # to "validate" it in another step. + location = DerivativeRodeo::StorageLocations::BaseLocation.load_location(input_location_adapter_name) + + # TODO: This is based on the provided output template in + # https://github.com/scientist-softserv/space_stone-serverless/blob/0dbe2b6fa13b9f4bf8b9580ec14d0af5c98e2b00/awslambda/bin/sample_post.rb#L1 + # and is very much hard-coded. We likely want to "store" the template in a common place for + # the application. + File.join(location.adapter_prefix, dirname, filename) + end + + def initialize(file_set) + @file_set = file_set + end + + attr_reader :file_set + delegate :uri, :mime_type, to: :file_set + + ## + # @return + # @see https://github.com/samvera/hyrax/blob/426575a9065a5dd3b30f458f5589a0a705ad7be2/app/services/hyrax/file_set_derivatives_service.rb#L18-L20 Hyrax::FileSetDerivativesService#valid? + def valid? + if in_the_rodeo? + Rails.logger.info("🤠🐮 Using the DerivativeRodeo for FileSet ID=#{file_set.id} with mime_type of #{mime_type}") + true + else + Rails.logger.info("Skipping the DerivativeRodeo for FileSet ID=#{file_set.id} with mime_type of #{mime_type}") + false + end + end + + ## + # @api public + def create_derivatives(filename) + # TODO: Do we need to handle "impending derivatives?" as per {IiifPrint::PluggableDerivativeService}? + case mime_type + when file_set.class.pdf_mime_types + lasso_up_some_derivatives(filename: filename, type: :pdf) + when file_set.class.image_mime_types + lasso_up_some_derivatives(filename: filename, type: :image) + else + # TODO: add more mime types but for now image and PDF are the two we accept. + raise "Unexpected mime_type #{mime_type} for filename #{filename}" + end + end + + private + + def lasso_up_some_derivatives(filename:, type:) + # TODO: Can we use the filename instead of the antics of the original_file on the file_set? + input_uri = self.class.input_uri_for(file_set: file_set, filename: filename) + named_derivatives_and_generators_by_type.fetch(type).flat_map do |named_derivative, generator_name| + output_location_template = "file://#{Hyrax::DerivativePath.derivative_path_for_reference(file_set, named_derivative)}" + generator = generator_name.constantize + generator.new(input_uris: [input_uri], output_location_template: output_location_template).generate_uris + end + end + + def supported_mime_types + # If we've configured the rodeo + named_derivatives_and_generators_by_type.keys.flat_map { |type| file_set.class.public_send("#{type}_mime_types") } + end + + def in_the_rodeo? + # We can assume that we are not going to process a supported mime type; and there is a cost + # for looking in the rodeo. + return false unless supported_mime_types.include?(mime_type) + + # TODO: Make it so! + raise NotImplementedError + end + end +end diff --git a/spec/services/iiif_print/derivative_rodeo_service_spec.rb b/spec/services/iiif_print/derivative_rodeo_service_spec.rb new file mode 100644 index 00000000..18bf26f5 --- /dev/null +++ b/spec/services/iiif_print/derivative_rodeo_service_spec.rb @@ -0,0 +1,63 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe IiifPrint::DerivativeRodeoService do + let(:work) { double({ described_class.parent_work_identifier_property => 'hello-1234-id' }) } + let(:file_set) { FileSet.new.tap { |fs| fs.save!(validate: false) } } + let(:generator) { DerivativeRodeo::Generators::CopyGenerator } + let(:output_extension) { "rb" } + + subject(:klass) { described_class } + + describe '.input_location_adapter_name' do + subject { described_class.input_location_adapter_name } + it { is_expected.to eq 's3' } + end + + describe '.parent_work_identifier_property' do + subject { described_class.parent_work_identifier_property } + it { is_expected.to be_a String } + end + + describe '.named_derivatives_and_generators_by_type' do + subject { described_class.named_derivatives_and_generators_by_type } + it { is_expected.to be_a Hash } + end + + describe '.input_uri_for' do + subject { described_class.input_uri_for(file_set: file_set) } + + context 'when the file_set does not have a parent' do + it 'is expected to raise an error' do + expect { subject }.to raise_error(IiifPrint::DataError) + end + end + + context 'when the file_set has a parent' do + before do + allow(file_set).to receive(:parent).and_return(work) + # TODO: This is a hack that leverages the internals of Hydra::Works; not excited about it but + # this part is only one piece of the over all integration. + allow(file_set).to receive(:original_file).and_return(double(original_filename: __FILE__)) + end + + it { is_expected.to start_with("#{described_class.input_location_adapter_name}://") } + it { is_expected.to end_with(File.basename(__FILE__)) } + end + end + + xdescribe '#valid?' do + context 'when the mime_type of the file is not supported' do + it { is_expected.to be_falsey } + end + + context 'when derivative rodeo has not pre-processed the file set' do + it { is_expected.to be_falsey } + end + + context 'when the mime type is supported and the derivative rodeo has pre-processed the file set' do + it { is_expected.to be_truthy } + end + end +end