diff --git a/lib/tasks/atc/aip.rake b/lib/tasks/atc/aip.rake index 660a28d..c12f6f3 100644 --- a/lib/tasks/atc/aip.rake +++ b/lib/tasks/atc/aip.rake @@ -5,6 +5,11 @@ namespace :atc do aip_path = ENV['path'] dry_run = ENV['dry_run'] == 'true' + if aip_path.blank? + puts "Missing required argument: aip_path" + next + end + aip_reader = Atc::AipReader.new(aip_path, verbose: true) # Identify checksum type for this AIP (sha256, sha512, or md5) and retrieve the associated ChecksumAlgorithm object @@ -42,5 +47,81 @@ namespace :atc do rescue Atc::Exceptions::AipLoadError => e puts "An error has occurred (#{e.class.name}):\n" + Rainbow(e.message).red end + + desc 'Check the status of an AIP that was previously loaded.' + task status: :environment do + aip_path = ENV['path'] + + if aip_path.blank? + puts "Missing required argument: aip_path" + next + end + + puts Rainbow("\nChecking on the status of SourceObjects with a path starting with: #{aip_path} ...").blue.bright + puts "(this can be a slow process)\n\n" + puts "------------------------------" + + time = Benchmark.measure do + number_of_local_files = Dir.glob(File.join(aip_path, '**', '*')).select { |file| File.file?(file) }.count + puts Rainbow("Number of files found in the AIP: #{number_of_local_files}").blue.bright + puts "-> This number should match the next number, which will be the number of SourceObject in the ATC database.\n\n" + + source_object_count = SourceObject.where('path LIKE ?', "#{aip_path}%").count + puts Rainbow("SourceObjects: #{source_object_count}").blue.bright + puts "-> SourceObjects should equal number of files in the AIP.\n\n" + + if number_of_local_files != source_object_count + puts Rainbow("ERROR: There was a mismatch between the number of files on the filesystem and the number of SourceObjects in the database!").red.bright + puts Rainbow("That's bad! This should be investigated further!\n").red.bright + end + + # Check to see how many of the AIP files have PendingTransfer records + { + 'AWS' => 0, # storage_type 0 is AWS + 'GCP' => 1 # storage_type 1 is GCP + }.each do |storage_provider_type_name, storage_provider_type_value| + count = PendingTransfer.where( + 'storage_provider_id IN (SELECT id FROM storage_providers WHERE storage_type = ?) '\ + 'AND '\ + 'source_object_id IN (SELECT id from source_objects WHERE path LIKE ?)', + storage_provider_type_value, + "#{aip_path}%" + ).count + puts Rainbow("#{storage_provider_type_name} PendingTransfers: #{count}").blue.bright + end + puts "-> PendingTransfers should equal 0 when all pre-transfer checksums have been calculated.\n\n" + + # Check to see how many of the AIP files have StoredObject records + { + 'AWS' => 0, # storage_type 0 is AWS + 'GCP' => 1 # storage_type 1 is GCP + }.each do |storage_provider_type_name, storage_provider_type_value| + count = StoredObject.where( + 'storage_provider_id IN (SELECT id FROM storage_providers WHERE storage_type = ?) '\ + 'AND '\ + 'source_object_id IN (SELECT id from source_objects WHERE path LIKE ?)', + storage_provider_type_value, + "#{aip_path}%" + ).count + puts Rainbow("#{storage_provider_type_name} StoredObjects: #{count}").blue.bright + end + puts "-> AWS and GCP StoredObject counts should equal #{source_object_count} when all transfers have completed.\n\n" + + # Check to see how many of the AIP files have AWS FixityVerification records + # NOTE: we only do fixity verifications on AWS records at this time. + grouped_counts = FixityVerification.where( + 'source_object_id IN (SELECT id from source_objects WHERE path LIKE ?)', + "#{aip_path}%" + ).group(:status).count + puts 'FixityVerifications:' + FixityVerification.statuses.keys.each do |status| + count = grouped_counts[status] + puts Rainbow("#{status}: #{count.to_i}").blue.bright + end + puts "-> FixityVerification success count should equal #{source_object_count} when all transfers have completed fixity verification, and there should be 0 failures.\n" + end + puts "------------------------------\n\n" + puts "Status check finished in #{time.real.round(2)} seconds.\n\n" + end end end