Skip to content

Commit

Permalink
node: save ts of last error
Browse files Browse the repository at this point in the history
  • Loading branch information
sni committed Oct 16, 2024
1 parent f422db7 commit b6b541a
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ sub get_server {
$facts->{$key} = 0;
if($data->{'rc'} != 0) {
$facts->{'last_error'} = $data->{'stdout'}.$data->{'stderr'};
$facts->{'last_error_ts'} = time();
}
$facts->{'last_job'} = $job;
$save_required = 1;
Expand Down Expand Up @@ -140,9 +141,7 @@ sub get_server {
$facts->{'gathering'} = 1;
}

for my $errKey (qw/last_error last_facts_error/) {
$facts->{$errKey} =~ s/\s+at\s+.*HTTP\.pm\s+line\s+\d+\.//gmx if $facts->{$errKey};
}
$facts->{'last_error'} =~ s/\s+at\s+.*(Utils|HTTP)\.pm\s+line\s+\d+\.//gmx if $facts->{'last_error'};

# gather available logs
my @logs = glob($c->config->{'var_path'}.'/node_control/'.$peer->{'key'}.'_*.log');
Expand Down Expand Up @@ -190,7 +189,7 @@ sub get_server {
omd_disk_free => $facts->{'omd_disk_free'} // '',
omd_available_versions => $facts->{'omd_packages_available'} // [],
last_error => $facts->{'last_error'} // '',
last_facts_error => $facts->{'last_facts_error'} // '',
last_error_ts => $facts->{'last_error_ts'} // '',
last_job => $facts->{'last_job'} // '',
logs => $logs,
facts => $facts || {},
Expand Down Expand Up @@ -241,8 +240,7 @@ sub ansible_get_facts {
};
my $err = $@;
if($err) {
$err =~ s/\s+at\s+.*?\.pm\ line\ \d+\.$//gmx;
$f = Thruk::Utils::IO::json_lock_patch($file, { 'gathering' => 0, 'last_facts_error' => $err }, { pretty => 1, allow_empty => 1 });
$f = Thruk::Utils::IO::json_lock_patch($file, { 'gathering' => 0, 'last_error' => $err, 'last_error_ts' => time() }, { pretty => 1, allow_empty => 1 });
}
return($f);
}
Expand Down Expand Up @@ -271,8 +269,7 @@ sub update_runtime_data {
};
my $err = $@;
if($err) {
$err =~ s/\s+at\s+.*?\.pm\ line\ \d+\.$//gmx;
$f = Thruk::Utils::IO::json_lock_patch($file, { 'gathering' => 0, 'last_error' => $err }, { pretty => 1, allow_empty => 1 });
$f = Thruk::Utils::IO::json_lock_patch($file, { 'gathering' => 0, 'last_error' => $err, 'last_error_ts' => time() }, { pretty => 1, allow_empty => 1 });
} else {
$f = Thruk::Utils::IO::json_lock_patch($file, { 'gathering' => 0, 'last_error' => '', %{$runtime} }, { pretty => 1, allow_empty => 1 });
}
Expand All @@ -292,7 +289,7 @@ sub _ansible_get_facts {

my $prev = Thruk::Utils::IO::json_lock_patch($file, { 'gathering' => $$ }, { pretty => 1, allow_empty => 1 });
$prev->{'gathering'} = 0;
$prev->{'last_facts_error'} = "";
$prev->{'last_error'} = "";

# available subsets are listed here:
# https://docs.ansible.com/ansible/latest/collections/ansible/builtin/setup_module.html#parameter-gather_subset
Expand Down Expand Up @@ -751,7 +748,7 @@ sub os_update {
die("starting job failed") unless $job;
};
if($@) {
$f = Thruk::Utils::IO::json_lock_patch($file, { 'os_updating' => 0, 'last_error' => $@ }, { pretty => 1, allow_empty => 1 });
$f = Thruk::Utils::IO::json_lock_patch($file, { 'os_updating' => 0, 'last_error' => $@, 'last_error_ts' => time() }, { pretty => 1, allow_empty => 1 });
return;
}

Expand Down Expand Up @@ -793,7 +790,7 @@ sub os_sec_update {
die("starting job failed") unless $job;
};
if($@) {
$f = Thruk::Utils::IO::json_lock_patch($file, { 'os_sec_updating' => 0, 'last_error' => $@ }, { pretty => 1, allow_empty => 1 });
$f = Thruk::Utils::IO::json_lock_patch($file, { 'os_sec_updating' => 0, 'last_error' => $@, 'last_error_ts' => time() }, { pretty => 1, allow_empty => 1 });
return;
}

Expand Down Expand Up @@ -1221,7 +1218,6 @@ sub _set_job_errored {

chomp($err);
_debug($err);
$err =~ s/\s+at\s+.*?\.pm\ line\ \d+\.$//gmx;

print "*** [ERROR] $err\n";

Expand All @@ -1232,7 +1228,8 @@ sub _set_job_errored {
$err = $cur->{'last_error'}."\n".$err;
}
my $data = {
'last_error' => $err,
'last_error' => $err,
'last_error_ts' => time()
};
$data->{$type} = 0;
$data->{$type."_failed"} = "1";
Expand Down Expand Up @@ -1290,6 +1287,7 @@ sub _die_connection_error {
my($peer, $http_err, $ssh_err) = @_;

if(($http_err//'') =~ m/^OMD:/mx) {
die($http_err."\nssh failed: ".$ssh_err) if $ssh_err;
die($http_err);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,8 @@ sub cmd {
$facts = Thruk::NodeControl::Utils::ansible_get_facts($c, $peer, 1);
}
}
if(!$facts || $facts->{'last_error'} || $facts->{'last_facts_error'}) {
my $err = sprintf("%s updating %s failed: %s\n", $peer->{'name'}, $mode, ($facts->{'last_facts_error'}||$facts->{'last_error'}//'unknown error'));
if(!$facts || $facts->{'last_error'}) {
my $err = sprintf("%s updating %s failed: %s\n", $peer->{'name'}, $mode, ($facts->{'last_error'}//'unknown error'));
if($ENV{'THRUK_CRON'}) {
_warn($err); # don't fill the log with errors from cronjobs
} else {
Expand Down
10 changes: 6 additions & 4 deletions plugins/plugins-available/node-control/templates/node_control.tt
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,6 @@
error_started = 0;
IF s.last_error;
error_lines = s.last_error.split("\n");
ELSIF s.last_facts_error;
error_lines = s.last_facts_error.split("\n");
END %]
<tr class="js-tabs" id="node_row_[% s.peer_key | html %]">
<td class="align-top js-node-row">[% IF s.section != "Default" && s.section != "" && s.section != last_section; s.section | html; END; last_section = s.section; %]</td>
Expand Down Expand Up @@ -143,7 +141,7 @@
</a>
[% END %]
</td>
[% IF error_lines.size > 0 || s.last_facts_error && !s.omd_site && !s.omd_version %]
[% IF error_lines.size > 0 %]
<td colspan="[% IF show_os_updates %]7[% ELSE %]6[% END %]" class="align-top textALERT whitespace-pre truncate overflow-hidden [% IF error_lines.size > 1 %]clickable[% END %]" style="max-width: 60vw;" [% IF error_lines.size > 1 %]onclick="toggleElement('errors_[% s.peer_key | html %]'); reapplyAllStripes(); return false;"[% END %]>[% error_lines.0 | html; error_started = 1 %]</td>
[% ELSE %]
[% IF s.omd_version && omd_default_version != s.omd_version %]
Expand Down Expand Up @@ -312,7 +310,11 @@
<tr class="hidden" id="errors_[% s.peer_key | html %]">
<td></td>
<td></td>
<td colspan="10" class="textALERT whitespace-pre truncate overflow-hidden" style="max-width: 60vw;">[% error_lines.join("\n") %]</td>
<td colspan="10" class="textALERT whitespace-pre truncate overflow-hidden" style="max-width: 60vw;">[%
ts = "[" _ date_format(c, s.last_error_ts) _ "] ";
ts;
error_lines.join("\n" _ ts)
%]</td>
</tr>
[% END %]
[% END %]
Expand Down

0 comments on commit b6b541a

Please sign in to comment.