Skip to content

Commit

Permalink
Add atc:aip:status task for checking the status of a recently-loaded AIP
Browse files Browse the repository at this point in the history
  • Loading branch information
elohanlon committed Nov 5, 2024
1 parent ce54149 commit 825a15f
Showing 1 changed file with 81 additions and 0 deletions.
81 changes: 81 additions & 0 deletions lib/tasks/atc/aip.rake
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ namespace :atc do
aip_path = ENV['path']
dry_run = ENV['dry_run'] == 'true'

if aip_path.blank?
puts "Missing required argument: aip_path"
next
end

aip_reader = Atc::AipReader.new(aip_path, verbose: true)

# Identify checksum type for this AIP (sha256, sha512, or md5) and retrieve the associated ChecksumAlgorithm object
Expand Down Expand Up @@ -42,5 +47,81 @@ namespace :atc do
rescue Atc::Exceptions::AipLoadError => e
puts "An error has occurred (#{e.class.name}):\n" + Rainbow(e.message).red
end

desc 'Check the status of an AIP that was previously loaded.'
task status: :environment do
aip_path = ENV['path']

if aip_path.blank?
puts "Missing required argument: aip_path"
next
end

puts Rainbow("\nChecking on the status of SourceObjects with a path starting with: #{aip_path} ...").blue.bright
puts "(this can be a slow process)\n\n"
puts "------------------------------"

time = Benchmark.measure do
number_of_local_files = Dir.glob(File.join(aip_path, '**', '*')).select { |file| File.file?(file) }.count
puts Rainbow("Number of files found in the AIP: #{number_of_local_files}").blue.bright
puts "-> This number should match the next number, which will be the number of SourceObject in the ATC database.\n\n"

source_object_count = SourceObject.where('path LIKE ?', "#{aip_path}%").count
puts Rainbow("SourceObjects: #{source_object_count}").blue.bright
puts "-> SourceObjects should equal number of files in the AIP.\n\n"

if number_of_local_files != source_object_count
puts Rainbow("ERROR: There was a mismatch between the number of files on the filesystem and the number of SourceObjects in the database!").red.bright
puts Rainbow("That's bad! This should be investigated further!\n").red.bright
end

# Check to see how many of the AIP files have PendingTransfer records
{
'AWS' => 0, # storage_type 0 is AWS
'GCP' => 1 # storage_type 1 is GCP
}.each do |storage_provider_type_name, storage_provider_type_value|
count = PendingTransfer.where(
'storage_provider_id IN (SELECT id FROM storage_providers WHERE storage_type = ?) '\
'AND '\
'source_object_id IN (SELECT id from source_objects WHERE path LIKE ?)',
storage_provider_type_value,
"#{aip_path}%"
).count
puts Rainbow("#{storage_provider_type_name} PendingTransfers: #{count}").blue.bright
end
puts "-> PendingTransfers should equal 0 when all pre-transfer checksums have been calculated.\n\n"

# Check to see how many of the AIP files have StoredObject records
{
'AWS' => 0, # storage_type 0 is AWS
'GCP' => 1 # storage_type 1 is GCP
}.each do |storage_provider_type_name, storage_provider_type_value|
count = StoredObject.where(
'storage_provider_id IN (SELECT id FROM storage_providers WHERE storage_type = ?) '\
'AND '\
'source_object_id IN (SELECT id from source_objects WHERE path LIKE ?)',
storage_provider_type_value,
"#{aip_path}%"
).count
puts Rainbow("#{storage_provider_type_name} StoredObjects: #{count}").blue.bright
end
puts "-> AWS and GCP StoredObject counts should equal #{source_object_count} when all transfers have completed.\n\n"

# Check to see how many of the AIP files have AWS FixityVerification records
# NOTE: we only do fixity verifications on AWS records at this time.
grouped_counts = FixityVerification.where(
'source_object_id IN (SELECT id from source_objects WHERE path LIKE ?)',
"#{aip_path}%"
).group(:status).count
puts Rainbow('FixityVerifications:').blue.bright
FixityVerification.statuses.keys.each do |status|
count = grouped_counts[status]
puts Rainbow("#{status}: #{count.to_i}").blue.bright
end
puts "-> FixityVerification success count should equal #{source_object_count} when all transfers have completed fixity verification, and there should be 0 failures.\n"
end
puts "------------------------------\n\n"
puts "Status check finished in #{time.real.round(2)} seconds.\n\n"
end
end
end

0 comments on commit 825a15f

Please sign in to comment.