Skip to content
This repository has been archived by the owner on Jul 24, 2021. It is now read-only.

Commit

Permalink
Merge pull request #853 from joyent/ether/v2-collapse-validation_result
Browse files Browse the repository at this point in the history
v2 script to collapse validation_result rows together with "identical" data
  • Loading branch information
karenetheridge authored Aug 7, 2019
2 parents bc562fe + 7f383b3 commit 8521644
Show file tree
Hide file tree
Showing 3 changed files with 208 additions and 0 deletions.
20 changes: 20 additions & 0 deletions docs/modules/Conch::Command::merge_validation_results.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# NAME

merge\_validation\_results - collapse duplicate validation\_result rows together

# SYNOPSIS

```
bin/conch merge_validation_results [long options...]
-n --dry-run dry-run (no changes are made)
--help print usage message and exit
```

# LICENSING

Copyright Joyent, Inc.

This Source Code Form is subject to the terms of the Mozilla Public License,
v.2.0. If a copy of the MPL was not distributed with this file, You can obtain
one at [http://mozilla.org/MPL/2.0/](http://mozilla.org/MPL/2.0/).
187 changes: 187 additions & 0 deletions lib/Conch/Command/merge_validation_results.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
package Conch::Command::merge_validation_results;

=pod
=head1 NAME
merge_validation_results - collapse duplicate validation_result rows together
=head1 SYNOPSIS
bin/conch merge_validation_results [long options...]
-n --dry-run dry-run (no changes are made)
--help print usage message and exit
=cut

use Mojo::Base 'Mojolicious::Command', -signatures;
use Getopt::Long::Descriptive;
use Try::Tiny;
use Data::Page;

has description => 'Collapse duplicate validation_result rows together';

has usage => sub { shift->extract_usage }; # extracts from SYNOPSIS

has 'dry_run';

sub run ($self, @opts) {
local @ARGV = @opts;
my ($opt, $usage) = describe_options(
# the descriptions aren't actually used anymore (mojo uses the synopsis instead)... but
# the 'usage' text block can be accessed with $usage->text
'merge_validation_results %o',
[ 'dry-run|n', 'dry-run (no changes are made)' ],
[],
[ 'help', 'print usage message and exit', { shortcircuit => 1 } ],
);

# ACHTUNG! only run this after migration 92 is done,
# because otherwise these queries will take even longer.

$self->dry_run($opt->dry_run);

# enable autoflush
my $prev = select(STDOUT); $|++; select($prev);

say Conch::Time->now, ' working'.($self->dry_run ? ' (in dry-run mode)' : '').'...';
my $schema = ($self->dry_run ? $self->app->ro_schema : $self->app->schema);

say 'At start, there are '
.$schema->resultset('validation_result')->count
.' validation_result rows.';
say '';

my ($validation_results_deleted, $device_count) = (0)x2;

# consider each device, oldest devices first, in pages of 100 rows each
my $device_rs = $schema->resultset('device')
->active
->rows(100)
->page(1)
->order_by('created');

foreach my $page (1 .. $device_rs->pager->last_page) {
$device_rs = $device_rs->page($page);
while (my $device = $device_rs->next) {
# we process each device's reports in a separate transaction,
# so we can abort and resume without redoing everything all over again
try {
$validation_results_deleted += $schema->txn_do(sub {
$self->_process_device($device);
});
++$device_count;
}
catch {
if (/Rollback failed/) {
local $@ = $_;
die; # propagate the error
}
print STDERR "\n", 'aborted processing of device ', $device->id, ': ', $_, "\n";
};
}
}

say '';
say Conch::Time->now, ' done.';
say '';
say $device_count.' devices processed.';
say $validation_results_deleted.' validation_result rows '
.($self->dry_run ? 'would be ' : '') .'deleted.';
say 'there are now '.$schema->resultset('validation_result')->count.' validation_result rows.';
}


sub _process_device ($self, $device) {
print 'device id ', $device->id, ': ';

my @grouping_cols = qw(device_id hardware_product_id validation_id message hint status category component_id result_order);

my $schema = $self->app->schema;
my ($group_count, $results_deleted) = (0)x2;

# find groups of validation_result rows that share identical column
# values that we have an index on (where new validation_states point
# to existing rows: see the end of Conch::ValidationSystem::run_validation_plan.
# Consider these groups in pages of 100 rows each.
my $groups_to_merge_rs = $schema->resultset('validation_result')
->columns(\@grouping_cols)
->search(
{ device_id => $device->id },
{ '+select' => [{ count => '*', -as => 'count' }] })
->group_by(\@grouping_cols)
->as_subselect_rs
->search({ count => { '>' => 1 } })
->columns(\@grouping_cols)
->order_by(\@grouping_cols)
->rows(100)
->page(1)
->hri;

foreach my $page (1 .. $groups_to_merge_rs->pager->last_page) {
$groups_to_merge_rs = $groups_to_merge_rs->page($page);

# foreach matching set,
# iterate through all matching rows oldest-first
# save the oldest one,
# delete the rest, updating validation_state_member to point to the oldest.
while (my $sample_result = $groups_to_merge_rs->next) {
++$group_count;
print '.' if $group_count % 100 == 0;

# all results in this resultset share the same values and can be collapsed down to
# a single result row
my $member_rs = $schema->resultset('validation_result')
->search({ $sample_result->%{@grouping_cols} });

if ($self->dry_run) {
$results_deleted += $member_rs->count - 1;
next;
}

# this is the validation_result row we keep - the oldest in this group
my $oldest_member_id = $member_rs
->order_by('created')
->rows(1)
->hri
->get_column('id')
->single;

# all validation_state_members pointing to results in this group should instead
# reference the oldest result in the group
$schema->resultset('validation_state_member')
->search(
{
validation_result_id => { '!=' => $oldest_member_id },
(map +('validation_result.'.$_ => $sample_result->{$_}), @grouping_cols),
},
{ join => 'validation_result' },
)
->update({ validation_result_id => $oldest_member_id });

# delete all the newly-orphaned validation_result rows
# (this is safe because we don't have a cascade delete on result -> member yet)
$results_deleted += $member_rs->search({ id => { '!=' => $oldest_member_id } })->delete;
}
}

say ' '.$results_deleted.' validation_results deleted';
return $results_deleted;
}

1;
__END__
=pod
=head1 LICENSING
Copyright Joyent, Inc.
This Source Code Form is subject to the terms of the Mozilla Public License,
v.2.0. If a copy of the MPL was not distributed with this file, You can obtain
one at L<http://mozilla.org/MPL/2.0/>.
=cut
# vim: set ts=4 sts=4 sw=4 et :
1 change: 1 addition & 0 deletions lib/Conch/DB/ResultSet.pm
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ __PACKAGE__->load_components(
'Helper::ResultSet::CorrelateRelationship', # provides correlate
'Helper::ResultSet::Shortcut::AddColumns', # provides add_columns
'+Conch::DB::AsEpoch', # provides as_epoch
'Helper::ResultSet::Shortcut::GroupBy', # provides group_by
);

1;
Expand Down

0 comments on commit 8521644

Please sign in to comment.