From b3f7d7b4eba7de5f7ce07bc4cd685fde05232174 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mart=C3=ADn=20Marqu=C3=A9s?= Date: Tue, 14 Oct 2014 12:38:32 -0300 Subject: [PATCH 01/22] Add support for application_name filter on long running transactions and long idle in transaction. Good for filtering applications like pg_dump, but also others without having to run them with a specific user. --- check_postgres.pl | 57 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 3 deletions(-) diff --git a/check_postgres.pl b/check_postgres.pl index a7a2a767..fb585525 100755 --- a/check_postgres.pl +++ b/check_postgres.pl @@ -920,7 +920,8 @@ package check_postgres; ## These options are multiples ('@s') for my $arr (qw/include exclude includeuser excludeuser host port - dbuser dbname dbpass dbservice schema/) { + dbuser dbname dbpass dbservice schema excludeapp + includeapp/) { next if $name ne $arr and $name ne "${arr}2"; push @{$tempopt{$name}} => $value; ## Don't set below as a normal value @@ -961,6 +962,8 @@ package check_postgres; 'exclude=s@', 'includeuser=s@', 'excludeuser=s@', + 'excludeapp=s@', + 'includeapp=s@', 'host|dbhost|H|dbhost1|H1=s@', 'port|dbport|p|port1|dbport1|p1=s@', @@ -1224,6 +1227,8 @@ package check_postgres; --exclude=name(s) items to specifically exclude (e.g. tables), depends on the action --includeuser=include objects owned by certain users --excludeuser=exclude objects owned by certain users + --excludeapp=exclude objects by application_name + --includeapp=include objects by application_name Other options: --assume-standby-mode assume that server in continious WAL recovery mode @@ -1871,6 +1876,52 @@ sub finishup { } } +our $APPWHERECLAUSE = ''; +if ($opt{includeapp}) { + my %applist; + for my $app (@{$opt{includeapp}}) { + for my $a2 (split /,/ => $app) { + $applist{$a2}++; + } + } + my $safeapp; + if (1 == keys %applist) { + ($safeapp = each %applist) =~ s/'/''/g; + $APPWHERECLAUSE = " AND application_name = '$safeapp'"; + } + else { + $APPWHERECLAUSE = ' AND application_name IN ('; + for my $app (sort keys %applist) { + ($safeapp = $app) =~ s/'/''/g; + $APPWHERECLAUSE .= "'$safeapp',"; + } + chop $APPWHERECLAUSE; + $APPWHERECLAUSE .= ')'; + } +} +elsif ($opt{excludeapp}) { + my %applist; + for my $app (@{$opt{excludeapp}}) { + for my $a2 (split /,/ => $app) { + $applist{$a2}++; + } + } + my $safeapp; + if (1 == keys %applist) { + ($safeapp = each %applist) =~ s/'/''/g; + $APPWHERECLAUSE = " AND application_name <> '$safeapp'"; + } + else { + $APPWHERECLAUSE = ' AND application_name NOT IN ('; + for my $app (sort keys %applist) { + ($safeapp = $app) =~ s/'/''/g; + $APPWHERECLAUSE .= "'$safeapp',"; + } + chop $APPWHERECLAUSE; + $APPWHERECLAUSE .= ')'; + } +} + ## Check number of connections, compare to max_connections check_backends() if $action eq 'backends'; @@ -7599,7 +7650,7 @@ sub check_txn_idle { $SQL = q{SELECT datname, datid, procpid AS pid, usename, client_addr, xact_start, current_query AS current_query, '' AS state, }. q{CASE WHEN client_port < 0 THEN 0 ELSE client_port END AS client_port, }. qq{COALESCE(ROUND(EXTRACT(epoch FROM now()-$start)),0) AS seconds }. - qq{FROM pg_stat_activity WHERE ($clause)$USERWHERECLAUSE }. + qq{FROM pg_stat_activity WHERE ($clause)$USERWHERECLAUSE $APPWHERECLAUSE }. q{ORDER BY xact_start, query_start, procpid DESC}; ## Craft an alternate version for old servers that do not have the xact_start column: ($SQL2 = $SQL) =~ s/xact_start/query_start AS xact_start/; @@ -7609,7 +7660,7 @@ sub check_txn_idle { $SQL2 = $SQL = q{SELECT datname, datid, procpid AS pid, usename, client_addr, current_query AS current_query, '' AS state, }. q{CASE WHEN client_port < 0 THEN 0 ELSE client_port END AS client_port, }. qq{COALESCE(ROUND(EXTRACT(epoch FROM now()-$start)),0) AS seconds }. - qq{FROM pg_stat_activity WHERE ($clause)$USERWHERECLAUSE }. + qq{FROM pg_stat_activity WHERE ($clause)$USERWHERECLAUSE $APPWHERECLAUSE }. q{ORDER BY query_start, procpid DESC}; } From 099a599e68459c16a7447b95bb10bb55384217c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mart=C3=ADn=20Marqu=C3=A9s?= Date: Tue, 14 Oct 2014 12:39:59 -0300 Subject: [PATCH 02/22] Better check for insufficient privileges and configuration options that haven't been set and are needed. --- check_postgres.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/check_postgres.pl b/check_postgres.pl index fb585525..d59ef537 100755 --- a/check_postgres.pl +++ b/check_postgres.pl @@ -7703,13 +7703,13 @@ sub check_txn_idle { my $st = $r->{state} || ''; ## Return unknown if we cannot see because we are a non-superuser - if ($cq =~ /insufficient/o) { + if ($cq =~ /^insufficient/o) { add_unknown msg('psa-nosuper'); return; } ## Return unknown if stats_command_string / track_activities is off - if ($cq =~ /disabled/o or $cq =~ //) { + if ($cq =~ /^disabled/o or $cq =~ /^/) { add_unknown msg('psa-disabled'); return; } From d039829f0bb9c43853c856cfe443a2f8e0c3f81f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mart=C3=ADn=20Marqu=C3=A9s?= Date: Tue, 2 Jun 2015 16:53:22 -0300 Subject: [PATCH 03/22] When running --action bloat, indexes names are not fully qualified so it's hard, or imposible to know which index is bloated if there are indexes with the same name in different schemas. Here we add the schema to the bloat message when finding bloat in indexes. --- check_postgres.pl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/check_postgres.pl b/check_postgres.pl index d59ef537..a32fba35 100755 --- a/check_postgres.pl +++ b/check_postgres.pl @@ -97,7 +97,7 @@ package check_postgres; 'backends-oknone' => q{No connections}, 'backends-po' => q{sorry, too many clients already}, 'backends-users' => q{$1 for number of users must be a number or percentage}, - 'bloat-index' => q{(db $1) index $2 rows:$3 pages:$4 shouldbe:$5 ($6X) wasted bytes:$7 ($8)}, + 'bloat-index' => q{(db $1) index $2.$3 rows:$4 pages:$5 shouldbe:$6 ($7X) wasted bytes:$8 ($9)}, 'bloat-nomin' => q{no relations meet the minimum bloat criteria}, 'bloat-table' => q{(db $1) table $2.$3 rows:$4 pages:$5 shouldbe:$6 ($7X) wasted size:$8 ($9)}, 'bug-report' => q{Please report these details to check_postgres@bucardo.org:}, @@ -3755,14 +3755,14 @@ sub check_bloat { ## Now the index, if it exists if ($index ne '?') { - my $nicename = perfname($index); + my $nicename = perfname("$schema.$index"); $perf{$iwb}{$nicename}++; - my $msg = msg('bloat-index', $dbname, $index, $irows, $ipages, $iotta, $ibloat, $iwb, $iws); + my $msg = msg('bloat-index', $dbname, $schema, $index, $irows, $ipages, $iotta, $ibloat, $iwb, $iws); my $ok = 1; my $iperbloat = $ibloat * 100; if ($MRTG) { - $stats{index}{"DB=$dbname INDEX=$index"} = [$iwb, $ibloat]; + $stats{index}{"DB=$dbname INDEX=$schema.$index"} = [$iwb, $ibloat]; next; } if ($critical->($iwb, $iperbloat)) { From 380064342ec35f5896188655369f1bd11716b4bc Mon Sep 17 00:00:00 2001 From: Greg Sabino Mullane Date: Tue, 23 Jun 2015 09:09:07 -0400 Subject: [PATCH 04/22] Version bump. --- check_postgres.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/check_postgres.pl b/check_postgres.pl index a32fba35..963a7f28 100755 --- a/check_postgres.pl +++ b/check_postgres.pl @@ -32,7 +32,7 @@ package check_postgres; binmode STDOUT, ':encoding(UTF-8)'; -our $VERSION = '2.21.0'; +our $VERSION = '2.21.1'; use vars qw/ %opt $PGBINDIR $PSQL $res $COM $SQL $db /; From a82fb8671dd4a5a6948939c33d2675d912acf137 Mon Sep 17 00:00:00 2001 From: Greg Sabino Mullane Date: Tue, 23 Jun 2015 09:16:26 -0400 Subject: [PATCH 05/22] Update release notes a bit --- check_postgres.pl | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/check_postgres.pl b/check_postgres.pl index 963a7f28..9d69e73b 100755 --- a/check_postgres.pl +++ b/check_postgres.pl @@ -8025,7 +8025,7 @@ =head1 NAME B - a Postgres monitoring script for Nagios, MRTG, Cacti, and others -This documents describes check_postgres.pl version 2.21.0 +This documents describes check_postgres.pl version 2.21.1 =head1 SYNOPSIS @@ -9883,20 +9883,44 @@ =head1 HISTORY Add explicit ORDER BY to the slony_status check to get the most lagged server. (Jeff Frost) - Declare POD encoding to be utf8. (Christoph Berg) + Change the way tables are quoted in replicate_row. + (Glyn Astill) - Query all sequences per DB in parallel for action=sequence. (Christoph Berg) + Improved multi-slave support in replicate_row. + (Andrew Yochum) + + Add xact timestamp support to hot_standby_delay. + Allow the hot_standby_delay check to accept xlog byte position or + timestamp lag intervals as thresholds, or even both at the same time. + (Josh Williams) + + Fix and extend hot_standby_delay documentation + (Michael Renner) + + Don't swallow space before the -c flag when reporting errors + (Jeff Janes) + + Show actual long-running query in query_time output + (Peter Eisentraut) + + Declare POD encoding to be utf8. + (Christoph Berg) + + Query all sequences per DB in parallel for action=sequence. + (Christoph Berg) =item B September 24, 2013 Fix issue with SQL steps in check_pgagent_jobs for sql steps which perform deletes (Rob Emery via github pull) - Install man page in section 1. (Peter Eisentraut, bug 53, github issue 26) + Install man page in section 1. + (Peter Eisentraut, bug 53, github issue 26) Order lock types in check_locks output to make the ordering predictable; setting SKIP_NETWORK_TESTS will skip the new_version tests; other minor test - suite fixes. (Christoph Berg) + suite fixes. + (Christoph Berg) Fix same_schema check on 9.3 by ignoring relminmxid differences in pg_class (Christoph Berg) From 483f492ebd3a399b24fe2dd8879b9e21eab215df Mon Sep 17 00:00:00 2001 From: Christoph Berg Date: Tue, 23 Jun 2015 16:26:10 +0200 Subject: [PATCH 06/22] Fix t/02_sequence.t for PG 9.0/1 --- t/02_sequence.t | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/02_sequence.t b/t/02_sequence.t index efce3a02..1111d895 100644 --- a/t/02_sequence.t +++ b/t/02_sequence.t @@ -118,7 +118,7 @@ if ($ver >= 90200) { like ($cp->run('--exclude=sequence_test_id_seq'), qr{WARNING:.+public.sequence_test_smallid_seq=92% \(calls left=2767\)}, $t); } else { SKIP: { - skip '"smallserial" needs PostgreSQL 9.2 or later', 2; + skip '"smallserial" needs PostgreSQL 9.2 or later', 1; } } From 989479fc4116710a0a7034371bc8b6e5b1d4dd85 Mon Sep 17 00:00:00 2001 From: Christoph Berg Date: Tue, 23 Jun 2015 16:35:31 +0200 Subject: [PATCH 07/22] Fix txn_time regression test for 9.0/9.1 Newer versions will show the last or current query here, older versions will just show " in transaction" if there is currently no query running. --- t/02_txn_time.t | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/t/02_txn_time.t b/t/02_txn_time.t index 8be2fa18..d743fe1f 100644 --- a/t/02_txn_time.t +++ b/t/02_txn_time.t @@ -76,7 +76,8 @@ sleep(1); like ($cp->run(q{-w 0}), qr{longest txn: 1s}, $t); $t .= ' (MRTG)'; -like ($cp->run(q{--output=mrtg -w 0}), qr{\d+\n0\n\nPID:\d+ database:$dbname username:\w+ query:SELECT 1\n}, $t); +my $query_patten = ($ver >= 90200) ? "SELECT 1" : " in transaction"; +like ($cp->run(q{--output=mrtg -w 0}), qr{\d+\n0\n\nPID:\d+ database:$dbname username:\w+ query:$query_patten\n}, $t); $idle_dbh->commit; From f0e1e80929447b322bc45465075db284813b8508 Mon Sep 17 00:00:00 2001 From: Greg Sabino Mullane Date: Fri, 26 Jun 2015 08:52:32 -0400 Subject: [PATCH 08/22] Rearrange recent changes in rough priority order --- check_postgres.pl | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/check_postgres.pl b/check_postgres.pl index 9d69e73b..beb06a3e 100755 --- a/check_postgres.pl +++ b/check_postgres.pl @@ -9877,38 +9877,38 @@ =head1 HISTORY =item B + Add xact timestamp support to hot_standby_delay. + Allow the hot_standby_delay check to accept xlog byte position or + timestamp lag intervals as thresholds, or even both at the same time. + (Josh Williams) + + Query all sequences per DB in parallel for action=sequence. + (Christoph Berg) + Fix bloat check to use correct SQL depending on the server version. (Adrian Vondendriesch) + Show actual long-running query in query_time output + (Peter Eisentraut) + Add explicit ORDER BY to the slony_status check to get the most lagged server. (Jeff Frost) - Change the way tables are quoted in replicate_row. - (Glyn Astill) - Improved multi-slave support in replicate_row. (Andrew Yochum) - Add xact timestamp support to hot_standby_delay. - Allow the hot_standby_delay check to accept xlog byte position or - timestamp lag intervals as thresholds, or even both at the same time. - (Josh Williams) - - Fix and extend hot_standby_delay documentation - (Michael Renner) + Change the way tables are quoted in replicate_row. + (Glyn Astill) Don't swallow space before the -c flag when reporting errors (Jeff Janes) - Show actual long-running query in query_time output - (Peter Eisentraut) + Fix and extend hot_standby_delay documentation + (Michael Renner) Declare POD encoding to be utf8. (Christoph Berg) - Query all sequences per DB in parallel for action=sequence. - (Christoph Berg) - =item B September 24, 2013 Fix issue with SQL steps in check_pgagent_jobs for sql steps which perform deletes From 0752e8d3ee9b8cf585ad310c722923460fb4dd34 Mon Sep 17 00:00:00 2001 From: Greg Sabino Mullane Date: Fri, 26 Jun 2015 09:18:02 -0400 Subject: [PATCH 09/22] Spelling --- check_postgres.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/check_postgres.pl b/check_postgres.pl index beb06a3e..b52f1f15 100755 --- a/check_postgres.pl +++ b/check_postgres.pl @@ -8908,7 +8908,7 @@ =head2 B to it. The slave server must be in hot_standby (e.g. read only) mode, therefore the minimum version to use this action is Postgres 9.0. The I<--warning> and I<--critical> options are the delta between the xlog locations. Since these values are byte offsets in the WAL they should match the expected transaction volume -of your application to prevent false postives or negatives. +of your application to prevent false positives or negatives. The first "--dbname", "--host", and "--port", etc. options are considered the master; the second belongs to the slave. @@ -8920,7 +8920,7 @@ =head2 B form 'I and I