diff --git a/lib/derivative_rodeo/storage_locations/base_location.rb b/lib/derivative_rodeo/storage_locations/base_location.rb index c1b6960..7c698f4 100644 --- a/lib/derivative_rodeo/storage_locations/base_location.rb +++ b/lib/derivative_rodeo/storage_locations/base_location.rb @@ -208,7 +208,7 @@ def derived_file_from(template:) ## # When you have a known location and want to check for files that are within that location, - # use the #globbed_tail_locations method. In the case of {Generators::PdfSplitGenerator} we + # use the {#globbed_tail_locations} method. In the case of {Generators::PdfSplitGenerator} we # need to know the path to the all of the image files we "split" off of the given PDF. # # We can use the :file_path as the prefix the given :tail_glob as the suffix for a "fully @@ -216,8 +216,8 @@ def derived_file_from(template:) # # @param tail_glob [String] # - # @return [StorageLocations::BaseLocation] when there is one or more files at the location - # @return [NilClass] when there are no files + # @return [Enumerable] the locations of the files; an empty + # array when there are none. def globbed_tail_locations(tail_glob:) raise NotImplementedError, "#{self.class}#globbed_locations" end diff --git a/lib/derivative_rodeo/storage_locations/s3_location.rb b/lib/derivative_rodeo/storage_locations/s3_location.rb index 17caa13..9dca524 100644 --- a/lib/derivative_rodeo/storage_locations/s3_location.rb +++ b/lib/derivative_rodeo/storage_locations/s3_location.rb @@ -66,6 +66,8 @@ def exist? # # @note S3 allows searching on a prefix but does not allow for "wildcard" searches. We can # use the components of the file_path to fake that behavior. + # + # @see Generators::PdfSplitGenerator#image_file_basename_template def globbed_tail_locations(tail_glob:) # file_path = "s3://blah/1234/hello-world/pages/*.tiff" # @@ -75,11 +77,19 @@ def globbed_tail_locations(tail_glob:) # and miscolate two PDFs. # # file_path = "s3://blah/1234/hello-world/hello-world.pdf - # TODO: Should file_path be file_dir? - globname = File.join(file_path, tail_glob) + globname = File.join(file_dir, tail_glob) regexp = %r{#{File.extname(globname)}$} + + # NOTE: We're making some informed guesses, needing to include the fully qualified template + # based on both the key of the item in the bucket as well as the bucket's host. + uri = URI.parse(file_uri) + scheme_and_host = "#{uri.scheme}://#{uri.host}" + bucket.objects(prefix: File.dirname(globname)).flat_map do |object| - derived_file_from(object.key) if object.key.match(regexp) + if object.key.match(regexp) + template = File.join(scheme_and_host, object.key) + derived_file_from(template: template) + end end end diff --git a/lib/spec/support/aws_s3_faux_bucket.rb b/lib/spec_support/aws_s3_faux_bucket.rb similarity index 100% rename from lib/spec/support/aws_s3_faux_bucket.rb rename to lib/spec_support/aws_s3_faux_bucket.rb diff --git a/spec/derivative_rodeo/storage_locations/s3_location_spec.rb b/spec/derivative_rodeo/storage_locations/s3_location_spec.rb index 2785f99..0269d1a 100644 --- a/spec/derivative_rodeo/storage_locations/s3_location_spec.rb +++ b/spec/derivative_rodeo/storage_locations/s3_location_spec.rb @@ -10,6 +10,7 @@ before do # Let's use a FakeBucket instead! subject.use_actual_s3_bucket = false + DerivativeRodeo.config do |config| config.aws_s3_bucket = 'fake-bucket' config.aws_s3_access_key_id = "FAKEFAKEFAKE" @@ -66,11 +67,22 @@ describe '#globbed_tail_locations' do it 'searched the bucket' do - basename_ish = short_path.split(".").first - key = File.join(basename_ish, File.basename(__FILE__)) + # Because we instantiated the subject as a location to the :file_path (e.g. let(:file_path)) + # we are encoding where things are relative to this file. In other words, this logic is + # mirroring the generator logic that says where we're writing derivatives relative to their + # original file/input file. + bucket_dir = "files/#{File.basename(file_path, '.tiff')}" + + basename = File.basename(__FILE__) + key = File.join(bucket_dir, "pages", basename) subject.bucket.object(key).upload_file(__FILE__) - subject.globbed_tail_locations(tail_glob: "*.rb") + non_matching_key = File.join(bucket_dir, "missing", basename) + subject.bucket.object(non_matching_key).upload_file(__FILE__) + + locations = subject.globbed_tail_locations(tail_glob: "ocr_color/pages/*.rb") + + expect(locations.size).to eq(1) end end end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index b73cf0b..db37dbc 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -36,7 +36,7 @@ # of increasing the boot-up time by auto-requiring all files in the support # directory. Alternatively, in the individual `*_spec.rb` files, manually # require only the support files necessary. -Dir.glob(File.expand_path("../lib/spec/support/**/*.rb", __dir__)).each { |f| require f } +Dir.glob(File.expand_path("../lib/spec_support/**/*.rb", __dir__)).each { |f| require f } Dir.glob(File.expand_path('./support/**/*.rb', __dir__)).each { |f| require f } RSpec.configure do |config|