Skip to content

Commit

Permalink
Cleanup logic improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
lpalovsky committed Aug 13, 2024
1 parent 1df4d5f commit c0e2d46
Show file tree
Hide file tree
Showing 7 changed files with 155 additions and 72 deletions.
81 changes: 63 additions & 18 deletions lib/sles4sap/sap_deployment_automation_framework/basetest.pm
Original file line number Diff line number Diff line change
Expand Up @@ -7,39 +7,84 @@
# Basetest used for Microsoft SDAF deployment

package sles4sap::sap_deployment_automation_framework::basetest;
use parent 'opensusebasetest';

use strict;
use warnings;
use testapi;
use parent 'opensusebasetest';
use Exporter qw(import);
use sles4sap::sap_deployment_automation_framework::deployment qw(sdaf_cleanup az_login load_os_env_variables);
use sles4sap::sap_deployment_automation_framework::deployment_connector qw(find_deployer_resources);
use sles4sap::console_redirection qw(connect_target_to_serial disconnect_target_from_serial);
use sles4sap::azure_cli qw(az_resource_delete);
use sles4sap::sap_deployment_automation_framework::deployment_connector
qw(find_deployer_resources destroy_deployer_vm get_deployer_vm_name find_deployment_id get_deployer_ip);
use sles4sap::console_redirection;

our @EXPORT = qw(full_cleanup);

sub post_fail_hook {
=head1 SYNOPSIS
Basetest for SDAF deployment. It includes full cleanup routine and post_fail hook.
Post run hook is not necessary as cleanup should not be triggered at the end of each test module.
=cut

=head2 full_cleanup
full_cleanup();
Function performs full SDAF cleanup. First, it checks which stages of deployment are applied to avoid executing
unnecessary cleanup commands. Cleanup is done in following order:
- execute SDAF remover script - destroys existing sap-systems and workload zone deployments
- destroy deployer VM and related resources like OS disk, NIC, Security group, etc.
- keeps control plane intact (Control plane must not be deleted)
=cut

sub full_cleanup {
if (get_var('SDAF_RETAIN_DEPLOYMENT')) {
record_info('Cleanup OFF', 'OpenQA variable "SDAF_RETAIN_DEPLOYMENT" is active, skipping cleanup.');
return;
}

record_info('Post fail', 'Executing post fail hook');
# Disable any stray redirection being active. This resets the console to the worker VM.
disconnect_target_from_serial if check_serial_redirection();
az_login();

# Check if deployer VM exists and collect required data
my $deployment_id = find_deployment_id();
my $deployer_vm_name = $deployment_id ? get_deployer_vm_name(deployment_id => find_deployment_id()) : undef;
my $deployer_ip = $deployer_vm_name ? get_deployer_ip(deployer_vm_name => $deployer_vm_name) : undef;

# If deployer exists, check if console redirection is possible
my $redirection_works;
if ($deployer_ip) {
set_var('REDIRECT_DESTINATION_USER', get_var('PUBLIC_CLOUD_USER', 'azureadm'));
set_var('REDIRECT_DESTINATION_IP', $deployer_ip);
# Do not fail even if connection is not successful
$redirection_works = connect_target_to_serial(fail_ok => '1');
}
my $sut_cleanup_message = $redirection_works ?
'Console redirection to Deployer VM does not seem to work. Destroying SUT infrastructure is not possible.' :
'Console redirection works, proceeding with SUT cleanup';
record_info('SUT cleanup', $sut_cleanup_message);

# Trigger SDAF remover script to destroy 'workload zone' and 'sap systems' resources
# Clean up all config files, keys, etc.. on deployer VM
connect_target_to_serial();
load_os_env_variables();
az_login();
sdaf_cleanup();
disconnect_target_from_serial();
if ($redirection_works) {
load_os_env_variables();
az_login();
sdaf_cleanup();
disconnect_target_from_serial(); # Exist Deployer console since we are about to destroy it
}
# Do not make cleanup fail here, we still need to destroy deployer VM and its resources.
record_info('SUT cleanup', 'Failed to set up redirection, skipping SDAF cleanup scripts.') unless $redirection_works;

# Cleanup deployer VM resources only
# Deployer VM is located in permanent deployer resource group. This RG **MUST STAY INTACT**
my @resource_cleanup_list = @{find_deployer_resources(return_value => 'id')};
record_info('Resources destroy',
"Following resources are being destroyed:\n" . join("\n", @{find_deployer_resources()}));
# Destroys deployer VM and its resources
destroy_deployer_vm();
}

az_resource_delete(ids => join(' ', @resource_cleanup_list),
resource_group => get_required_var('SDAF_DEPLOYER_RESOURCE_GROUP'), timeout => '600');
sub post_fail_hook {
record_info('Post fail', 'Executing post fail hook');
full_cleanup();
}

1;
Original file line number Diff line number Diff line change
Expand Up @@ -717,7 +717,8 @@ sub sdaf_execute_remover {
# SDAF must be executed from the profile directory, otherwise it will fail
assert_script_run("cd " . $tfvars_path);
record_info('SDAF destroy', "Executing SDAF remover:\n$remover_cmd");
my $rc = script_run($remover_cmd, timeout => 3600);
# Keep the timeout high, definitely above 1H. Azure tends to be slow.
my $rc = script_run($remover_cmd, timeout => 7200);
upload_logs($output_log_file, log_name => $output_log_file);

# Do not kill the test, only return RC. There are still files to be cleaned up on deployer VM side.
Expand Down Expand Up @@ -754,6 +755,7 @@ sub sdaf_cleanup {
assert_script_run('cd'); # navigate out the directory you are about to delete
assert_script_run('rm -Rf ' . deployment_dir());
record_info('Cleanup files', join(' ', 'Deployment directory', deployment_dir, 'was deleted.'));
record_info('SDAF remover', 'Sdaf remover scripts finished');
}

=head2 sdaf_execute_playbook
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,16 @@ use Mojo::JSON qw(decode_json);
use Scalar::Util qw(looks_like_number);
use Carp qw(croak);
use mmapi qw(get_parents get_job_autoinst_vars get_children get_job_info get_current_job_id);
use sles4sap::azure_cli qw(az_resource_delete);
use Data::Dumper;

our @EXPORT = qw(
get_deployer_vm
get_deployer_vm_name
get_deployer_ip
check_ssh_availability
find_deployment_id
find_deployer_resources
destroy_deployer_vm
);

=head2 check_ssh_availability
Expand Down Expand Up @@ -110,9 +112,9 @@ sub get_deployer_ip {
return undef;
}

=head2 get_deployer_vm
=head2 get_deployer_vm_name
get_deployer_vm(deployer_resource_group=>$deployer_resource_group, deployment_id=>'123456');
get_deployer_vm_name(deployer_resource_group=>$deployer_resource_group, deployment_id=>'123456');
Returns deployer VM name which is tagged with B<deployment_id> specified in parameter. This means that the VM was used
to deploy the infrastructure under this ID and contains whole SDAF setup.
Expand All @@ -125,7 +127,7 @@ B<deployment_id>: Deployment ID
=cut

sub get_deployer_vm {
sub get_deployer_vm_name {
my (%args) = @_;
$args{deployer_resource_group} //= get_required_var('SDAF_DEPLOYER_RESOURCE_GROUP');
$args{deployment_id} //= find_deployment_id();
Expand Down Expand Up @@ -192,7 +194,7 @@ sub find_deployment_id {
my @ids_found;
for my $deployment_id (@check_list) {
my $vm_name =
get_deployer_vm(deployer_resource_group => $args{deployer_resource_group}, deployment_id => $deployment_id);
get_deployer_vm_name(deployer_resource_group => $args{deployer_resource_group}, deployment_id => $deployment_id);
push(@ids_found, $deployment_id) if $vm_name;
}
die "More than one deployment found.\nJobs IDs: " .
Expand Down Expand Up @@ -235,3 +237,44 @@ sub find_deployer_resources {

return \@resource_list;
}


=head2 destroy_deployer_vm
destroy_deployer_vm([timeout=>900]);
Collects resource id of all resources belonging to the deployer VM and deletes them.
Cleanup deployer VM resources only, B<deployer resource group itself will stay intact>.
B<timeout>: Timeout for destroy command. Default: 800
=cut

sub destroy_deployer_vm {
my (%args) = @_;
$args{timeout} //= '800';
my $retries = 3; # retry to delete 3x

# Deployer VM is located in permanent deployer resource group. This RG **MUST STAY INTACT**
my @resource_cleanup_list = @{find_deployer_resources(return_value => 'id')};
unless (@resource_cleanup_list) {
record_info('Deployer cleanup', 'No resources related to deployer VM found');
return;
}

record_info('Deployer cleanup',
"Following resources are being destroyed:\n" . join("\n", @{find_deployer_resources()}));

for my $attempt (1 .. $retries) {
record_info("Attempt #$attempt");
az_resource_delete(ids => join(' ', @resource_cleanup_list),
resource_group => get_required_var('SDAF_DEPLOYER_RESOURCE_GROUP'), verbose => 'yes', timeout => $args{timeout});
sleep 5; # Just give things few secs to avoid command spamming.

# Check if all resources were cleaned up
@resource_cleanup_list = @{find_deployer_resources()};
last unless @resource_cleanup_list;
die "Failed to clean up resources:\n" . join("\n", @resource_cleanup_list) if ($attempt == $retries);
}
record_info('Deployer cleanup', 'All resources destroyed');
}
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ L<https://github.com/Azure/sap-automation/blob/3c5d0d882f5892ae2159e262062e29c2b
sub convert_region_to_short {
my ($region) = @_;
croak 'Missing mandatory argument "$region"' unless $region;
croak "Abbreviation must use lowercase alphanumeric characters. Got: '$region'" unless $region =~ /^[a-z0-9]+$/;
croak "Region name must use lowercase alphanumeric characters. Got: '$region'" unless $region =~ /^[a-z0-9]+$/;

my @found_results = grep { $_ if $sdaf_region_matrix{$_} eq $region } keys(%sdaf_region_matrix);

Expand Down
36 changes: 27 additions & 9 deletions t/26_deployment_connector.t
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use Scalar::Util qw(reftype);
use List::Util qw(any none);
use sles4sap::sap_deployment_automation_framework::deployment_connector;

subtest '[get_deployer_vm] Test expected failures' => sub {
subtest '[get_deployer_vm_name] Test expected failures' => sub {
my $mock_function = Test::MockModule->new('sles4sap::sap_deployment_automation_framework::deployment_connector', no_auto => 1);
$mock_function->redefine(diag => sub { return; });
$mock_function->redefine(script_output => sub { return '
Expand All @@ -21,11 +21,11 @@ subtest '[get_deployer_vm] Test expected failures' => sub {
]
'; });

dies_ok { get_deployer_vm(deployer_resource_group => 'Char') } 'Croak with missing mandatory arg: deployment_id';
dies_ok { get_deployer_vm(deployer_resource_group => 'Char', deployment_id => '0079') } 'Die with multiple VMs tagged with same ID';
dies_ok { get_deployer_vm_name(deployer_resource_group => 'Char') } 'Croak with missing mandatory arg: deployment_id';
dies_ok { get_deployer_vm_name(deployer_resource_group => 'Char', deployment_id => '0079') } 'Die with multiple VMs tagged with same ID';
};

subtest '[get_deployer_vm] Check command composition' => sub {
subtest '[get_deployer_vm_name] Check command composition' => sub {
my $mock_function = Test::MockModule->new('sles4sap::sap_deployment_automation_framework::deployment_connector', no_auto => 1);
my @calls;
$mock_function->redefine(diag => sub { return; });
Expand All @@ -34,7 +34,7 @@ subtest '[get_deployer_vm] Check command composition' => sub {
]'
});

my $result = get_deployer_vm(deployer_resource_group => 'Char', deployment_id => '0079');
my $result = get_deployer_vm_name(deployer_resource_group => 'Char', deployment_id => '0079');
note("\n --> " . join("\n --> ", @calls));
ok((grep /az vm list/, @calls), 'Check main az command');
ok((grep /--resource-group Char/, @calls), 'Check --resource-group argument');
Expand All @@ -43,21 +43,21 @@ subtest '[get_deployer_vm] Check command composition' => sub {
is $result, '0079-Zaku_II', 'Return VM name';

$mock_function->redefine(script_output => sub { push(@calls, @_); return '[]' });
is get_deployer_vm(deployer_resource_group => 'Char', deployment_id => '0079'), undef, 'Return empty string if no VM found';
is get_deployer_vm_name(deployer_resource_group => 'Char', deployment_id => '0079'), undef, 'Return empty string if no VM found';
};

subtest '[find_deployment_id]' => sub {
my $mock_function = Test::MockModule->new('sles4sap::sap_deployment_automation_framework::deployment_connector', no_auto => 1);
$mock_function->redefine(get_current_job_id => sub { return '0079'; });
$mock_function->redefine(get_parent_ids => sub { return ['0083', '0087']; });
$mock_function->redefine(get_deployer_vm => sub { return '0079' if grep(/0079/, @_); });
$mock_function->redefine(get_deployer_vm_name => sub { return '0079' if grep(/0079/, @_); });

is find_deployment_id(deployer_resource_group => 'Char'), '0079', 'Current job ID belongs to VM';

$mock_function->redefine(get_current_job_id => sub { return; });
is find_deployment_id(deployer_resource_group => 'Char'), undef, 'Return undef if no ID found';

$mock_function->redefine(get_deployer_vm => sub { return '0083' if grep(/0083/, @_); });
$mock_function->redefine(get_deployer_vm_name => sub { return '0083' if grep(/0083/, @_); });
is find_deployment_id(deployer_resource_group => 'Char'), '0083', 'Parent job ID belongs to VM';
};

Expand Down Expand Up @@ -182,8 +182,26 @@ subtest '[check_ssh_availability] Test command looping' => sub {

check_ssh_availability($ip_addr, wait_started => '1');
ok(($loop_count > 0), "Test retry loop with \$args{wait_started}. Loop count: $loop_count");

};

subtest '[destroy_deployer_vm]' => sub {
my $mock_function = Test::MockModule->new('sles4sap::sap_deployment_automation_framework::deployment_connector', no_auto => 1);
my $cleanup_triggered;
$mock_function->redefine(record_info => sub { note(join(' ', 'RECORD_INFO -->', @_)); });
$mock_function->redefine(az_resource_delete => sub { $cleanup_triggered = '1'; return; });
set_var('SDAF_DEPLOYER_RESOURCE_GROUP', 'Zabi');
$mock_function->redefine(find_deployer_resources => sub { return []; });
destroy_deployer_vm();
is $cleanup_triggered, undef, 'Do not trigger VM cleanup if VM not detected';

# This will return empty array on second loop, so UT can test looping as well
my $loop_counter = 0;
$mock_function->redefine(find_deployer_resources => sub {
$loop_counter++; return [] if ($loop_counter == 3); return ['Gihren', 'Garma', 'Dozle']; });
destroy_deployer_vm();
is $cleanup_triggered, '1', 'Trigger cleanup with resources detected';
ok(($loop_counter != 0), 'Check if "attempt" logic works');
set_var('SDAF_DEPLOYER_RESOURCE_GROUP', undef);
};

done_testing;
40 changes: 10 additions & 30 deletions tests/sles4sap/sap_deployment_automation_framework/cleanup.pm
Original file line number Diff line number Diff line change
Expand Up @@ -9,48 +9,28 @@
# Post run hooks are generally disabled during normal module run so the infrastructure persists between test modules.
# Cleanup is triggered only with B<SDAF_DO_CLEANUP> set to true, which is done by scheduling this module at the end of test flow.

use parent 'sles4sap::sap_deployment_automation_framework::basetest';
use parent 'opensusebasetest';
use strict;
use testapi;
use warnings;
use serial_terminal qw(select_serial_terminal);
use sles4sap::sap_deployment_automation_framework::deployment
qw(serial_console_diag_banner
sdaf_cleanup
az_login
load_os_env_variables);
use sles4sap::sap_deployment_automation_framework::deployment_connector qw(find_deployer_resources);
use sles4sap::console_redirection qw(connect_target_to_serial disconnect_target_from_serial);
use sles4sap::azure_cli qw(az_resource_delete);
qw(serial_console_diag_banner);
use sles4sap::sap_deployment_automation_framework::basetest qw(full_cleanup);

sub test_flags {
return {fatal => 1};
}

sub run {
select_serial_terminal;
serial_console_diag_banner('Start: sdaf_cleanup.pm');
if (get_var('SDAF_RETAIN_DEPLOYMENT')) {
record_info('Cleanup OFF', 'OpenQA variable "SDAF_RETAIN_DEPLOYMENT" is active, skipping cleanup.');
return;
}

# Trigger SDAF remover script to destroy 'workload zone' and 'sap systems' resources
# Clean up all config files, keys, etc.. on deployer VM
connect_target_to_serial();
load_os_env_variables();
az_login();
sdaf_cleanup();
disconnect_target_from_serial();

# Cleanup deployer VM resources only
# Deployer VM is located in permanent deployer resource group. This RG **MUST STAY INTACT**
my @resource_cleanup_list = @{find_deployer_resources(return_value => 'id')};
record_info('Resources destroy',
"Following resources are being destroyed:\n" . join("\n", @{find_deployer_resources()}));

az_resource_delete(ids => join(' ', @resource_cleanup_list),
resource_group => get_required_var('SDAF_DEPLOYER_RESOURCE_GROUP'), timeout => '600');

full_cleanup();
serial_console_diag_banner('End: sdaf_cleanup.pm');
}

sub post_fail_hook {
record_info('CLEANUP FAIL');
return;
}
1;
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,9 @@ use strict;
use warnings;
use testapi;
use sles4sap::sap_deployment_automation_framework::deployment
qw(serial_console_diag_banner
az_login
sdaf_prepare_private_key
);
qw(serial_console_diag_banner az_login sdaf_prepare_private_key);
use sles4sap::sap_deployment_automation_framework::deployment_connector
qw(get_deployer_vm
get_deployer_ip
);
qw(get_deployer_vm_name get_deployer_ip find_deployment_id);
use serial_terminal qw(select_serial_terminal);

sub test_flags {
Expand All @@ -28,7 +23,7 @@ sub run {
serial_console_diag_banner('Module sdaf_redirect_console_to_deployer.pm : start');
az_login();

my $deployer_vm_name = get_deployer_vm;
my $deployer_vm_name = get_deployer_vm_name(deployment_id => find_deployment_id());
# VM can be created by scheduling 'tests/sles4sap/sap_deployment_automation_framework/create_deployer_vm.pm'
die 'Deployer VM not found. Check if VM exists.' unless $deployer_vm_name;
record_info('VM found', "Deployer VM found: $deployer_vm_name");
Expand Down

0 comments on commit c0e2d46

Please sign in to comment.