From d39f2d5b9a88afbbfc14f7e92879c5eb22d309ef Mon Sep 17 00:00:00 2001 From: Jeremy Friesen Date: Wed, 24 May 2023 14:52:04 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=8E=81=20Adding=20IiifPrint::DerivativeRo?= =?UTF-8?q?deoService?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As written, this is a draft commit. Its purpose is to share the implementation details and how we might incorporate the DerivativeRodeo into the derivative generation. This draft will break the build as it does not include adding the DerivativeRodeo as a dependency. An ongoing challenge is that Valkyrie and ActiveFedora's Faraday requirements conflict with the DerivativeRodeo. I put this code forward with lots of comments and the beginnings of tests to ensure that we're on the right path. Related to: - https://github.com/scientist-softserv/iiif_print/issues/219 - https://github.com/scientist-softserv/iiif_print/pull/243 --- .../iiif_print/derivative_rodeo_service.rb | 128 ++++++++++++++++++ .../derivative_rodeo_service_spec.rb | 63 +++++++++ 2 files changed, 191 insertions(+) create mode 100644 app/services/iiif_print/derivative_rodeo_service.rb create mode 100644 spec/services/iiif_print/derivative_rodeo_service_spec.rb diff --git a/app/services/iiif_print/derivative_rodeo_service.rb b/app/services/iiif_print/derivative_rodeo_service.rb new file mode 100644 index 00000000..8e9081bf --- /dev/null +++ b/app/services/iiif_print/derivative_rodeo_service.rb @@ -0,0 +1,128 @@ +module IiifPrint + ## + # This class implements the interface of a Hyrax::DerivativeService. + # + # @see https://github.com/samvera/hyrax/blob/main/app/services/hyrax/derivative_service.rb Hyrax::DerivativesService + class DerivativeRodeoService + ## + # @!group Class Attributes + # + # @attr parent_work_identifier_property [String] the property we use to identify the unique + # identifier of the parent work as it went through the SpaceStone pre-process. + # + # TODO: The default of :aark_id is a quick hack for adventist. By exposing a configuration + # value, my hope is that this becomes easier to configure. + class_attribute :parent_work_identifier_property, default: 'aark_id' + + ## + # @attr input_location_adapter_name [String] The name of a derivative rodeo storage location; + # this will must be a registered with the DerivativeRodeo::StorageLocations::BaseLocation. + class_attribute :input_location_adapter_name, default: 's3' + + ## + # @attr named_derivatives_and_generators_by_type [Hash] the named derivative and it's + # associated generator. The "name" is important for Hyrax. The generator is one that + # exists in the DerivativeRodeo. + # + # TODO: Could be nice to have a registry for the DerivativeRodeo::Generators; but that's a + # tomorrow wish. + class_attribute(:named_derivatives_and_generators_by_type, default: { + pdf: { thumbnail: "DerivativeRodeo::Generators::ThumbnailGenerator" } + }) + # @!endgroup Class Attributes + ## + + ## + # This method "hard-codes" some existing assumptions about the input_uri based on + # implementations for Adventist. Those are reasonable assumptions but time will tell how + # reasonable. + # + # @param file_set [FileSet] + # @return [String] + def self.input_uri_for(file_set:) + # TODO: This is duplicated logic for another service, consider extracting a helper module; + # better yet wouldn't it be nice if Hyrax did this right and proper. + parent = file_set.parent || file_set.member_of.find(&:work?) + raise IiifPrint::DataError, "Parent not found for #{file_set.class} ID=#{file_set.id}" unless parent + + dirname = parent.public_send(parent_work_identifier_property) + + # TODO: This is a hack that knows about the inner workings of Hydra::Works, but for + # expendiency, I'm using it. See + # https://github.com/samvera/hydra-works/blob/c9b9dd0cf11de671920ba0a7161db68ccf9b7f6d/lib/hydra/works/services/add_file_to_file_set.rb#L49-L53 + # TODO: Could we get away with filename that is passed in the create_derivatives process? + filename = Hydra::Works::DetermineOriginalName.call(file_set.original_file) + + # TODO: What kinds of exceptions might we raise if the location is not configured? Do we need + # to "validate" it in another step. + location = DerivativeRodeo::StorageLocations::BaseLocation.load_location(input_location_adapter_name) + + # TODO: This is based on the provided output template in + # https://github.com/scientist-softserv/space_stone-serverless/blob/0dbe2b6fa13b9f4bf8b9580ec14d0af5c98e2b00/awslambda/bin/sample_post.rb#L1 + # and is very much hard-coded. We likely want to "store" the template in a common place for + # the application. + File.join(location.adapter_prefix, dirname, filename) + end + + def initialize(file_set) + @file_set = file_set + end + + attr_reader :file_set + delegate :uri, :mime_type, to: :file_set + + ## + # @return + # @see https://github.com/samvera/hyrax/blob/426575a9065a5dd3b30f458f5589a0a705ad7be2/app/services/hyrax/file_set_derivatives_service.rb#L18-L20 Hyrax::FileSetDerivativesService#valid? + def valid? + if in_the_rodeo? + Rails.logger.info("🤠🐮 Using the DerivativeRodeo for FileSet ID=#{file_set.id} with mime_type of #{mime_type}") + true + else + Rails.logger.info("Skipping the DerivativeRodeo for FileSet ID=#{file_set.id} with mime_type of #{mime_type}") + false + end + end + + ## + # @api public + def create_derivatives(filename) + # TODO: Do we need to handle "impending derivatives?" as per {IiifPrint::PluggableDerivativeService}? + case mime_type + when file_set.class.pdf_mime_types + lasso_up_some_derivatives(filename: filename, type: :pdf) + when file_set.class.image_mime_types + lasso_up_some_derivatives(filename: filename, type: :image) + else + # TODO: add more mime types but for now image and PDF are the two we accept. + raise "Unexpected mime_type #{mime_type} for filename #{filename}" + end + end + + private + + def lasso_up_some_derivatives(filename:, type:) + # TODO: Can we use the filename instead of the antics of the original_file on the file_set? + input_uri = self.class.input_uri_for(file_set: file_set, filename: filename) + named_derivatives_and_generators_by_type.fetch(type).flat_map do |named_derivative, generator_name| + output_location_template = "file://#{Hyrax::DerivativePath.derivative_path_for_reference(file_set, named_derivative)}" + generator = generator_name.constantize + generator.new(input_uris: [input_uri], output_location_template: output_location_template).generate_uris + end + end + + def supported_mime_types + # If we've configured the rodeo + named_derivatives_and_generators_by_type.keys.flat_map { |type| file_set.class.public_send("#{type}_mime_types") } + end + + def in_the_rodeo? + # We can assume that we are not going to process a supported mime type; and there is a cost + # for looking in the rodeo. + return false unless supported_mime_types.include?(mime_type) + + # TODO: Make it so! + raise NotImplementedError + end + end +end diff --git a/spec/services/iiif_print/derivative_rodeo_service_spec.rb b/spec/services/iiif_print/derivative_rodeo_service_spec.rb new file mode 100644 index 00000000..18bf26f5 --- /dev/null +++ b/spec/services/iiif_print/derivative_rodeo_service_spec.rb @@ -0,0 +1,63 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe IiifPrint::DerivativeRodeoService do + let(:work) { double({ described_class.parent_work_identifier_property => 'hello-1234-id' }) } + let(:file_set) { FileSet.new.tap { |fs| fs.save!(validate: false) } } + let(:generator) { DerivativeRodeo::Generators::CopyGenerator } + let(:output_extension) { "rb" } + + subject(:klass) { described_class } + + describe '.input_location_adapter_name' do + subject { described_class.input_location_adapter_name } + it { is_expected.to eq 's3' } + end + + describe '.parent_work_identifier_property' do + subject { described_class.parent_work_identifier_property } + it { is_expected.to be_a String } + end + + describe '.named_derivatives_and_generators_by_type' do + subject { described_class.named_derivatives_and_generators_by_type } + it { is_expected.to be_a Hash } + end + + describe '.input_uri_for' do + subject { described_class.input_uri_for(file_set: file_set) } + + context 'when the file_set does not have a parent' do + it 'is expected to raise an error' do + expect { subject }.to raise_error(IiifPrint::DataError) + end + end + + context 'when the file_set has a parent' do + before do + allow(file_set).to receive(:parent).and_return(work) + # TODO: This is a hack that leverages the internals of Hydra::Works; not excited about it but + # this part is only one piece of the over all integration. + allow(file_set).to receive(:original_file).and_return(double(original_filename: __FILE__)) + end + + it { is_expected.to start_with("#{described_class.input_location_adapter_name}://") } + it { is_expected.to end_with(File.basename(__FILE__)) } + end + end + + xdescribe '#valid?' do + context 'when the mime_type of the file is not supported' do + it { is_expected.to be_falsey } + end + + context 'when derivative rodeo has not pre-processed the file set' do + it { is_expected.to be_falsey } + end + + context 'when the mime type is supported and the derivative rodeo has pre-processed the file set' do + it { is_expected.to be_truthy } + end + end +end