diff --git a/lib/tasks/atc/aip.rake b/lib/tasks/atc/aip.rake index 9b965ed..86d981f 100644 --- a/lib/tasks/atc/aip.rake +++ b/lib/tasks/atc/aip.rake @@ -1,14 +1,31 @@ namespace :atc do namespace :aip do + + def aip_path_is_valid?(aip_path) + if !File.exist?(aip_path) + puts Rainbow("Error: AIP not found at path: #{aip_path}").red.bright + return false + elsif aip_path.include?('//') + puts Rainbow("Error: The AIP path you entered contains a double slash (//): #{aip_path}").red.bright + return false + elsif aip_path == '/' + puts Rainbow("Error: Invalid AIP path: #{aip_path}").red.bright + return false + end + + true + end + desc 'Load files from an AIP into ATC, load checksums from the AIP manifest, and initiate transfer and verification processes.' task load: :environment do aip_path = ENV['path'] dry_run = ENV['dry_run'] == 'true' if aip_path.blank? - puts "Missing required argument: aip_path" + puts Rainbow("Missing required argument: aip_path").red.bright next end + next unless aip_path_is_valid?(aip_path) aip_reader = Atc::AipReader.new(aip_path, verbose: true) @@ -53,13 +70,17 @@ namespace :atc do aip_path = ENV['path'] if aip_path.blank? - puts "Missing required argument: aip_path" + puts Rainbow("Missing required argument: aip_path").red.bright next end + next unless aip_path_is_valid?(aip_path) puts Rainbow("\nChecking on the status of SourceObjects with a path starting with: #{aip_path} ...").blue.bright puts "(this can be a slow process)\n\n" - puts "------------------------------" + + puts "-----------------------------" + puts "| Results |" + puts "-----------------------------" time = Benchmark.measure do number_of_local_files = Dir.glob(File.join(aip_path, '**', '*')).select { |file| File.file?(file) }.count @@ -67,12 +88,13 @@ namespace :atc do puts "-> This number should match the next number, which will be the number of SourceObject in the ATC database.\n\n" source_object_count = SourceObject.where('path LIKE ?', "#{aip_path}%").count - puts Rainbow("SourceObjects: #{source_object_count}").blue.bright + puts Rainbow("SourceObjects added to ATC database: #{source_object_count}").blue.bright puts "-> SourceObjects should equal the number of files in the AIP (#{Rainbow(number_of_local_files).blue.bright}).\n\n" if number_of_local_files != source_object_count - puts Rainbow("ERROR: There was a mismatch between the number of files on the filesystem and the number of SourceObjects in the database!").red.bright - puts Rainbow("That's bad! This should be investigated further!\n").red.bright + puts Rainbow("ERROR: There was a mismatch between the number of files on the filesystem and the number of SourceObjects in the ATC database!").red.bright + puts Rainbow("That's bad! This requires investigation!").red.bright + next end # # NOTE: The section below is currently commented out because it might not actually be helpful @@ -98,36 +120,51 @@ namespace :atc do # puts "Changes in these numbers are only an indication that the transfer is in progress. \n\n" # Check to see how many of the AIP files have StoredObject records - { - 'AWS' => 0, # storage_type 0 is AWS - 'GCP' => 1 # storage_type 1 is GCP - }.each do |storage_provider_type_name, storage_provider_type_value| - count = StoredObject.where( - 'storage_provider_id IN (SELECT id FROM storage_providers WHERE storage_type = ?) '\ - 'AND '\ - 'source_object_id IN (SELECT id from source_objects WHERE path LIKE ?)', - storage_provider_type_value, - "#{aip_path}%" - ).count - puts Rainbow("#{storage_provider_type_name} StoredObjects: #{count}").blue.bright - end + aws_stored_object_count = StoredObject.where( + 'storage_provider_id IN (SELECT id FROM storage_providers WHERE storage_type = ?) '\ + 'AND '\ + 'source_object_id IN (SELECT id from source_objects WHERE path LIKE ?)', + 0, + "#{aip_path}%" + ).count + puts Rainbow("AWS StoredObjects: #{aws_stored_object_count}").blue.bright + + gcp_stored_object_count = StoredObject.where( + 'storage_provider_id IN (SELECT id FROM storage_providers WHERE storage_type = ?) '\ + 'AND '\ + 'source_object_id IN (SELECT id from source_objects WHERE path LIKE ?)', + 1, + "#{aip_path}%" + ).count + puts Rainbow("GCP StoredObjects: #{gcp_stored_object_count}").blue.bright + puts "-> AWS and GCP StoredObject counts should equal the number of files in the AIP (#{Rainbow(number_of_local_files).blue.bright}) when all transfers have completed.\n\n" # Check to see how many of the AIP files have AWS FixityVerification records # NOTE: we only do fixity verifications on AWS records at this time. - grouped_counts = FixityVerification.where( + grouped_status_counts = FixityVerification.where( 'source_object_id IN (SELECT id from source_objects WHERE path LIKE ?)', "#{aip_path}%" ).group(:status).count puts Rainbow('FixityVerifications:').blue.bright FixityVerification.statuses.keys.each do |status| - count = grouped_counts[status] - puts Rainbow("#{status}: #{count.to_i}").blue.bright + puts Rainbow("#{status}: #{grouped_status_counts[status].to_i}").blue.bright end - puts "-> FixityVerification success count should equal the number of files in the AIP (#{Rainbow(number_of_local_files).blue.bright}) when all transfers have completed fixity verification, and there should be 0 failures.\n" + puts "-> FixityVerification success count should equal the number of files in the AIP (#{Rainbow(number_of_local_files).blue.bright}) when all transfers have completed fixity verification, and there should be 0 failures.\n\n" + + # Print summary (and any warnings) + + puts "-----------------------------" + puts "| Summary |" + puts "-----------------------------" + + puts "Local file count (#{Rainbow(number_of_local_files).blue.bright}) matches ATC DB SourceObject count (#{Rainbow(source_object_count).blue.bright})? #{number_of_local_files == source_object_count ? Rainbow("YES").green : Rainbow("NO").red.bright}" + puts "AWS transfers complete? #{number_of_local_files == aws_stored_object_count ? Rainbow("YES").green : Rainbow("NO").red.bright}" + puts "GCP transfers complete? #{number_of_local_files == gcp_stored_object_count ? Rainbow("YES").green : Rainbow("NO").red.bright}" + puts "Fixity verifications complete? #{number_of_local_files == grouped_status_counts['success'] ? Rainbow("YES").green : Rainbow("NO").red.bright}" end - puts "------------------------------\n\n" - puts "Status check finished in #{time.real.round(2)} seconds.\n\n" + + puts "\nStatus check finished in #{time.real.round(2)} seconds.\n\n" end end end