From dc1827d62bd03317bba0bc573f8b8e4a58e3d5c6 Mon Sep 17 00:00:00 2001 From: Stefanie Janine Stoelting Date: Tue, 12 Jan 2021 14:20:17 +0100 Subject: [PATCH 01/11] Switched to primary/secondary in comments. --- CHANGELOG.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f4e17b..27c4ba5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -43,7 +43,7 @@ Pacemaker 1.1.13 using a corosync 2.x stack. Changelog since 2.1: * new: support PostgreSQL 10 -* new: add the maxlag parameter to exclude lagging slaves from promotion, Thomas Reiss +* new: add the maxlag parameter to exclude lagging secondaries from promotion, Thomas Reiss * new: support for multiple pgsqlms resources in the same cluster * new: provide comprehensive error messages to crm_mon * fix: follow the resource agent man page naming policy and section @@ -93,7 +93,7 @@ Pacemaker 1.1.13 using a corosync 2.x stack. * fix: check on application_name in validate_all * change: do not start standby with a master score of 1 * change: choose the clone to promote when no master score exist -* new: detect and deal master/slave recovery transition +* new: detect and deal priamry/secondary recovery transition * new: detect and enforce reliability of a switchover * new: set next best secondaries base on their lag * misc: code cleanup and refactoring @@ -126,7 +126,7 @@ Release date: 2016-04-27 * fix: OCF tests when PostgreSQL does not listen in /tmp * change: do not update score outside of a monitor action (gh #18) * new: add parameter 'start_opts', usefull for debian and derivated (gh #11) -* new: add specific timeout for master and slave roles in meta-data (gh #14) +* new: add specific timeout for primary and secondary roles in meta-data (gh #14) * new: add debian packaging related files @@ -136,4 +136,3 @@ Release date: 2016-04-27 Release date: 2016-03-02 * First public release - From 936f4eef51f003de3d1ed57ad5b3602b5f23cc52 Mon Sep 17 00:00:00 2001 From: Stefanie Janine Stoelting Date: Tue, 12 Jan 2021 14:20:28 +0100 Subject: [PATCH 02/11] Switched to primary/secondary in comments. --- README.md | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index b7cbaed..e81fb87 100644 --- a/README.md +++ b/README.md @@ -24,11 +24,11 @@ yet powerful. Once your PostgreSQL cluster built using internal streaming replication, PAF is able to expose to Pacemaker what is the current status of the PostgreSQL -instance on each node: master, slave, stopped, catching up, etc. Should a -failure occurs on the master, Pacemaker will try to recover it by default. -Should the failure be non-recoverable, PAF allows the slaves to be able to -elect the best of them (the closest one to the old master) and promote it as -the new master. All of this thanks to the robust, feature-full and most +instance on each node: primary, secondary, stopped, catching up, etc. Should a +failure occurs on the primary, Pacemaker will try to recover it by default. +Should the failure be non-recoverable, PAF allows the secondaries to be able to +elect the best of them (the closest one to the old primary) and promote it as +the new primary. All of this thanks to the robust, feature-full and most importantly experienced project: Pacemaker. For information about how to install this agent, see `INSTALL.md`. @@ -40,10 +40,10 @@ CentOS 6 and 7 in various scenario. PAF has been written to give to the administrator the maximum control over their PostgreSQL configuration and architecture. Thus, you are 100% -responsible for the master/slave creations and their setup. The agent +responsible for the primary/secondary creations and their setup. The agent will NOT edit your setup. It only requires you to follow these pre-requisites: - * slave __must__ be in hot_standby (accept read-only connections) ; + * secondary __must__ be in hot_standby (accept read-only connections) ; * the following parameters __must__ be configured in the appropriate place : * `standby_mode = on` (for PostgreSQL 11 and before) * `recovery_target_timeline = 'latest'` @@ -66,7 +66,7 @@ can set: * `pghost`: the socket directory or IP address to use to connect to the local instance (default: `/tmp` or `/var/run/postgresql` for DEBIAN) * `pgport`: the port to connect to the local instance (default: `5432`) - * `recovery_template`: __only__ for PostgreSQL 11 and before. The local + * `recovery_template`: __only__ for PostgreSQL 11 and before. The local template that will be copied as the `PGDATA/recovery.conf` file. This file must not exist on any node for PostgreSQL 12 and after. (default: `$PGDATA/recovery.conf.pcmk`) @@ -76,11 +76,10 @@ can set: `-c config_file=/etc/postgresql/9.3/main/postgresql.conf` * `system_user`: the system owner of your instance's process (default: `postgres`) - * `maxlag`: maximum lag allowed on a standby before we set a negative master + * `maxlag`: maximum lag allowed on a standby before we set a negative primary score on it. The calculation is based on the difference between the current - xlog location on the master and the write location on the standby. + xlog location on the primary and the write location on the standby. (default: 0, which disables this feature) For a demonstration about how to setup a cluster, see [http://clusterlabs.github.io/PAF/documentation.html](http://clusterlabs.github.io/PAF/documentation.html). - From 2fa6da41e76179f26ab1463d58e093e6b51ee9a0 Mon Sep 17 00:00:00 2001 From: Stefanie Janine Stoelting Date: Tue, 12 Jan 2021 14:21:12 +0100 Subject: [PATCH 03/11] Switched to primary/secondary in comments. --- debian/changelog | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/debian/changelog b/debian/changelog index 036a29c..8a6b0e0 100644 --- a/debian/changelog +++ b/debian/changelog @@ -123,7 +123,7 @@ resource-agents-paf (2.0~beta1-1) unstable; urgency=low * 2.0_beta1 beta release * fix: do not use crm_node --partition to discover resources * fix: unknown argument --query when calling crm_master - * fix: perl warning when master score has never been set on the master + * fix: perl warning when master score has never been set on the primary * fix: remove wrong info message during post-promote notify * fix: race condition when setting attributes during actions * fix: bug where pgport and pghost where ignored in _query @@ -133,7 +133,7 @@ resource-agents-paf (2.0~beta1-1) unstable; urgency=low * fix: check on application_name in validate_all * change: do not start standby with a master score of 1 * change: choose the clone to promote when no master score exist - * new: detect and deal master/slave recovery transition + * new: detect and deal primary/secondary recovery transition * new: detect and enforce reliability of a switchover * new: set next best secondaries base on their lag * misc: code cleanup and refactoring @@ -144,7 +144,7 @@ resource-agents-paf (2.0~beta1-1) unstable; urgency=low resource-agents-paf (1.0.1-1) unstable; urgency=low * 1.0.1 minor release - * fix: forbid the master to decrease its own score (gh #19) + * fix: forbid the primary to decrease its own score (gh #19) * fix: bad LSN decimal converstion (gh #20) * fix: support PostgreSQL 9.5 controldata output (gh #12) * fix: set group id of given system_user before executing commands (gh #11) @@ -153,7 +153,7 @@ resource-agents-paf (1.0.1-1) unstable; urgency=low * fix: OCF tests when PostgreSQL does not listen in /tmp * change: do not update score outside of a monitor action (gh #18) * new: add parameter 'start_opts', usefull for debian and derivated (gh #11) - * new: add specific timeout for master and slave roles in meta-data (gh #14) + * new: add specific timeout for primary and secondary roles in meta-data (gh #14) * new: add debian packaging related files -- Jehan-Guillaume (ioguix) de Rorthais Wed, 27 Apr 2016 13:22:50 +0200 From ee2113f418fc35c0dc16c699c55c06507ea059f2 Mon Sep 17 00:00:00 2001 From: Stefanie Janine Stoelting Date: Tue, 12 Jan 2021 14:21:20 +0100 Subject: [PATCH 04/11] Switched to primary/secondary in comments. --- debian/control | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/control b/debian/control index c4e4c67..74ba2e5 100644 --- a/debian/control +++ b/debian/control @@ -17,4 +17,4 @@ Description: PostgreSQL resource agent for Pacemaker the Pacemaker administration and the PostgreSQL one, to keep things simple, documented and yet powerful. . - It only supports multi-state (Master-Slave) PostgreSQL clusters. + It only supports multi-state (Primary-Secondary) PostgreSQL clusters. From 1628229d2809e38d94be7476c65e00688782e4eb Mon Sep 17 00:00:00 2001 From: Stefanie Janine Stoelting Date: Tue, 12 Jan 2021 14:22:25 +0100 Subject: [PATCH 05/11] Switched to primary/secondary in comments. --- extra/vagrant/2nodes-qdevice-vip/provision/pgsql.bash | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extra/vagrant/2nodes-qdevice-vip/provision/pgsql.bash b/extra/vagrant/2nodes-qdevice-vip/provision/pgsql.bash index b4720e9..22021f3 100755 --- a/extra/vagrant/2nodes-qdevice-vip/provision/pgsql.bash +++ b/extra/vagrant/2nodes-qdevice-vip/provision/pgsql.bash @@ -83,14 +83,14 @@ if [ "$NODENAME" == "$PRIM_NODE" ]; then chown -R postgres:postgres "$PGDATA" - # create master ip + # create primary ip ip -o addr show to "${PRIM_IP}" | if ! grep -q "${PRIM_IP}" then DEV=$(ip route show to "${PRIM_IP}/24"|grep -Eo 'dev \w+') ip addr add "${PRIM_IP}/24" dev "${DEV/dev }" fi - # restart master pgsql + # restart primary pgsql systemctl --quiet start "postgresql-${PGVER}" exit From ffe60cad8326f9f85b670944493cecca0b5121e4 Mon Sep 17 00:00:00 2001 From: Stefanie Janine Stoelting Date: Tue, 12 Jan 2021 14:22:59 +0100 Subject: [PATCH 06/11] Switched to primary/secondary in comments. --- extra/vagrant/3nodes-haproxy/provision/pacemaker.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extra/vagrant/3nodes-haproxy/provision/pacemaker.bash b/extra/vagrant/3nodes-haproxy/provision/pacemaker.bash index 4248f2b..2363a02 100755 --- a/extra/vagrant/3nodes-haproxy/provision/pacemaker.bash +++ b/extra/vagrant/3nodes-haproxy/provision/pacemaker.bash @@ -85,7 +85,7 @@ PGSQLD_RSC_OPTS=( # NB: pcs 0.10.2 doesn't support to set the id of the clone XML node # the id is built from the rsc id to clone using "-clone" # As a matter of cohesion and code simplicity, we use the same -# convention to create the master resource with pcs 0.9.x for +# convention to create the primary resource with pcs 0.9.x for # Pacemaker 1.1 if [ "$PCMK_VER" -ge 2 ]; then PGSQLD_RSC_OPTS+=( "promotable" "notify=true" ) From 45bdfb2c3a30c34feefe0cac58df20d59b993838 Mon Sep 17 00:00:00 2001 From: Stefanie Janine Stoelting Date: Tue, 12 Jan 2021 14:23:24 +0100 Subject: [PATCH 07/11] Switched to primary/secondary in comments. --- extra/vagrant/3nodes-haproxy/provision/pgsql.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extra/vagrant/3nodes-haproxy/provision/pgsql.bash b/extra/vagrant/3nodes-haproxy/provision/pgsql.bash index be7c36a..6750402 100755 --- a/extra/vagrant/3nodes-haproxy/provision/pgsql.bash +++ b/extra/vagrant/3nodes-haproxy/provision/pgsql.bash @@ -83,7 +83,7 @@ if [ "$NODENAME" == "$PRIM_NODE" ]; then chown -R postgres:postgres "$PGDATA" - # restart master pgsql + # restart primary pgsql systemctl --quiet start "postgresql-${PGVER}" exit From d4ac0075e7e52021d8bfccf9898c734ddc7a4a26 Mon Sep 17 00:00:00 2001 From: Stefanie Janine Stoelting Date: Tue, 12 Jan 2021 14:23:54 +0100 Subject: [PATCH 08/11] Switched to primary/secondary in comments. --- extra/vagrant/3nodes-vip/provision/pgsql.bash | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extra/vagrant/3nodes-vip/provision/pgsql.bash b/extra/vagrant/3nodes-vip/provision/pgsql.bash index b4720e9..1ca069a 100755 --- a/extra/vagrant/3nodes-vip/provision/pgsql.bash +++ b/extra/vagrant/3nodes-vip/provision/pgsql.bash @@ -83,7 +83,7 @@ if [ "$NODENAME" == "$PRIM_NODE" ]; then chown -R postgres:postgres "$PGDATA" - # create master ip + # create primary ip ip -o addr show to "${PRIM_IP}" | if ! grep -q "${PRIM_IP}" then DEV=$(ip route show to "${PRIM_IP}/24"|grep -Eo 'dev \w+') From 3ba694f903121333fcf276cd63acffb80dff8e92 Mon Sep 17 00:00:00 2001 From: Stefanie Janine Stoelting Date: Tue, 12 Jan 2021 14:24:11 +0100 Subject: [PATCH 09/11] Switched to primary/secondary in comments. --- lib/OCF_Functions.pm | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/OCF_Functions.pm b/lib/OCF_Functions.pm index 895e7cc..723b8e4 100755 --- a/lib/OCF_Functions.pm +++ b/lib/OCF_Functions.pm @@ -12,7 +12,7 @@ OCF_Functions - helper subroutines for OCF agent use FindBin; use lib "$FindBin::RealBin/../../lib/heartbeat/"; - + use OCF_Functions; =head1 DESCRIPTION @@ -257,7 +257,7 @@ sub ha_debug { else { printf STDERR "%s\n", join ' ', @ARG; } - + return 0; } @@ -376,7 +376,7 @@ sub ocf_is_clone { } # returns true if the resource is configured as a multistate -# (master/slave) resource. This is defined as a resource where the +# (primary/secondary) resource. This is defined as a resource where the # master-max meta attribute is present, and set to greater than zero. sub ocf_is_ms { return ( defined $ENV{'OCF_RESKEY_CRM_meta_master_max'} @@ -394,7 +394,7 @@ sub ocf_is_ver { sub ocf_ver2num { my $v = 0; - + $v = $v * 1000 + $1 while $ARG[0] =~ /(\d+)/g; return $v; @@ -431,7 +431,7 @@ sub ocf_version_cmp { my $v1_level; my $v2_level; my $level_diff; - + return 3 unless ocf_is_ver( $v1 ); return 3 unless ocf_is_ver( $v2 ); From 5b94c491f9326e415ccee21af94580200e3d8e26 Mon Sep 17 00:00:00 2001 From: Stefanie Janine Stoelting Date: Tue, 12 Jan 2021 14:24:37 +0100 Subject: [PATCH 10/11] Switched to primary/secondary in comments and log messages. --- script/pgsqlms | 142 ++++++++++++++++++++++++------------------------- 1 file changed, 71 insertions(+), 71 deletions(-) diff --git a/script/pgsqlms b/script/pgsqlms index 7dd695e..ac38b98 100755 --- a/script/pgsqlms +++ b/script/pgsqlms @@ -207,7 +207,7 @@ sub _get_last_received_lsn { return undef; } -# Get the master score for each connected standby +# Get the primary score for each connected standby # Returns directly the result set of the query or exit with an error. # Exits with OCF_ERR_GENERIC if the query failed sub _get_lag_scores { @@ -349,7 +349,7 @@ sub _delete_priv_attr { return; } -# Get, parse and return the resource master score on given node. +# Get, parse and return the resource primary score on given node. # Returns an empty string if not found. # Returns undef on crm_master call on error sub _get_master_score { @@ -417,7 +417,7 @@ sub _is_master_recover { ); } -# Check if the current transition is a recover of a slave clone on given node. +# Check if the current transition is a recover of a secondary clone on given node. sub _is_slave_recover { my ( $n ) = @_; @@ -706,19 +706,19 @@ sub _controldata_to_ocf { return $OCF_ERR_INSTALLED ; } -# Check the write_location of all secondaries, and adapt their master score so -# that the instance closest to the master will be the selected candidate should +# Check the write_location of all secondaries, and adapt their primary score so +# that the instance closest to the primary will be the selected candidate should # a promotion be triggered. # NOTE: This is only a hint to pacemaker! The selected candidate to promotion # actually re-check it is the best candidate and force a re-election by failing # if a better one exists. This avoid a race condition between the call of the -# monitor action and the promotion where another slave might have catchup faster -# with the master. +# monitor action and the promotion where another secondary might have catchup faster +# with the primary. # NOTE: we cannot directly use the write_location, neither a lsn_diff value as # promotion score as Pacemaker considers any value greater than 1,000,000 as # INFINITY. # -# This sub must be executed from a master monitor action. +# This sub must be executed from a primary monitor action. # sub _check_locations { my $partition_nodes; @@ -743,7 +743,7 @@ sub _check_locations { ocf_log( 'warning', 'No secondary connected to the master' ) if $row_num == 0; - # For each standby connected, set their master score based on the following + # For each standby connected, set their primary score based on the following # rule: the first known node/application, with the highest priority and # an acceptable state. while ( $row = shift @rs ) { @@ -820,9 +820,9 @@ sub _check_locations { # _check_switchover # check if the pgsql switchover to the localnode is safe. -# This is supposed to be called **after** the master has been stopped or demoted. +# This is supposed to be called **after** the primary has been stopped or demoted. # This sub checks if the local standby received the shutdown checkpoint from the -# old master to make sure it can take over the master role and the old master +# old primary to make sure it can take over the primary role and the old primary # will be able to catchup as a standby after. # # Returns 0 if switchover is safe @@ -843,20 +843,20 @@ sub _check_switchover { .' Need to check the last record in WAL', $OCF_NOTIFY_ENV{'demote'}[0]{'uname'}, $nodename ); - # check if we received the shutdown checkpoint of the master during its + # check if we received the shutdown checkpoint of the primary during its # demote process. # We need the last local checkpoint LSN and the last received LSN from - # master to check in the WAL between these adresses if we have a + # primary to check in the WAL between these adresses if we have a # "checkpoint shutdown" using pg_xlogdump/pg_waldump. # # Force a checkpoint to make sure the controldata shows the very last TL - # and the master's shutdown checkpoint + # and the primaries shutdown checkpoint _query( q{ CHECKPOINT }, {} ); %cdata = _get_controldata(); $tl = $cdata{'tl'}; $last_redo = $cdata{'redo'}; - # Get the last received LSN from master + # Get the last received LSN from primary $last_lsn = _get_last_received_lsn(); unless ( defined $last_lsn ) { @@ -877,12 +877,12 @@ sub _check_switchover { if ( $rc == 0 and $ans =~ m{^rmgr: XLOG.*desc: (?i:checkpoint)(?::|_SHUTDOWN) redo [0-9A-F/]+; tli $tl;.*; shutdown$}m ) { - ocf_log( 'info', 'Slave received the shutdown checkpoint' ); + ocf_log( 'info', 'Secondary received the shutdown checkpoint' ); return 0; } ocf_exit_reason( - 'Did not receive the shutdown checkpoint from the old master!' ); + 'Did not receive the shutdown checkpoint from the old primary!' ); return 1; } @@ -909,7 +909,7 @@ sub _confirm_role { elsif ( $is_in_recovery eq 'f' ) { # The instance is a primary. ocf_log( 'debug', "_confirm_role: instance $OCF_RESOURCE_INSTANCE is a primary"); - # Check lsn diff with current slaves if any + # Check lsn diff with current secondaries if any _check_locations() if $__OCF_ACTION eq 'monitor'; return $OCF_RUNNING_MASTER; } @@ -947,7 +947,7 @@ sub _confirm_stopped { my $pgctlstatus_rc; my $controldata_rc; - # Check the postmaster process status. + # Check the postprimary process status. $pgctlstatus_rc = _pg_ctl_status(); if ( $pgctlstatus_rc == 0 ) { @@ -1061,9 +1061,9 @@ parameter is set or a template file is found. =item B -Maximum lag allowed on a standby before we set a negative master score on it. +Maximum lag allowed on a standby before we set a negative primary score on it. The calculation is based on the difference between the current xlog location on -the master and the write location on the standby. +the primary and the write location on the standby. (optional, integer, default "0" disables this feature) @@ -1155,8 +1155,8 @@ sub ocf_meta_data { - Maximum lag allowed on a standby before we set a negative master score on it. The calculation - is based on the difference between the current LSN on the master and the LSN + Maximum lag allowed on a standby before we set a negative primary score on it. The calculation + is based on the difference between the current LSN on the primary and the LSN written on the standby. This parameter must be a valid positive number as described in PostgreSQL documentation. See: https://www.postgresql.org/docs/current/static/sql-syntax-lexical.html#SQL-SYNTAX-CONSTANTS-NUMERIC @@ -1168,7 +1168,7 @@ sub ocf_meta_data { Path to the recovery.conf template. This file is simply copied to \$PGDATA - before starting the instance as slave. + before starting the instance as secondary. ONLY for PostgreSQL 11 and bellow. This parameter is IGNORED for PostgreSQL 12 and higher. The cluster will refuse to start if a template file is found. @@ -1229,11 +1229,11 @@ Suggested minimum timeout: 20. =item B -Promotes the resource to the Master role. Suggested minimum timeout: 30. +Promotes the resource to the primary role. Suggested minimum timeout: 30. =item B -Demotes the resource to the Slave role. Suggested minimum timeout: 120. +Demotes the resource to the secondary role. Suggested minimum timeout: 120. =item B @@ -1308,7 +1308,7 @@ sub pgsql_validate_all { chomp $ans; unless ( lc($ans) =~ /^true$|^on$|^yes$|^y$|^1$/ ) { ocf_exit_reason( - 'You must set meta parameter notify=true for your master resource' + 'You must set meta parameter notify=true for your primary resource' ); return $OCF_ERR_INSTALLED; } @@ -1478,16 +1478,16 @@ sub pgsql_start { if ( $rc == $OCF_SUCCESS ) { ocf_log( 'info', 'Instance "%s" started', $OCF_RESOURCE_INSTANCE ); - # Check if a master score exists in the cluster. - # During the very first start of the cluster, no master score will - # exists on any of the existing slaves, unless an admin designated - # one of them using crm_master. If no master exists the cluster will - # not promote a master among the slaves. - # To solve this situation, we check if there is at least one master + # Check if a primary score exists in the cluster. + # During the very first start of the cluster, no primary score will + # exists on any of the existing secondaries, unless an admin designated + # one of them using crm_primary. If no primary exists the cluster will + # not promote a primary among the secondaries. + # To solve this situation, we check if there is at least one primary # score existing on one node in the cluster. Do nothing if at least - # one master score is found among the clones of the resource. If no - # master score exists, set a score of 1 only if the resource was a - # shut downed master before the start. + # one primary score is found among the clones of the resource. If no + # primary score exists, set a score of 1 only if the resource was a + # shut downed primary before the start. if ( $prev_state eq "shut down" and not _master_score_exists() ) { ocf_log( 'info', 'No master score around. Set mine to 1' ); @@ -1498,7 +1498,7 @@ sub pgsql_start { } ocf_exit_reason( - 'Instance "%s" is not running as a slave (returned %d)', + 'Instance "%s" is not running as a secondary (returned %d)', $OCF_RESOURCE_INSTANCE, $rc ); return $OCF_ERR_GENERIC; @@ -1719,9 +1719,9 @@ sub pgsql_demote { return $OCF_ERR_GENERIC; } - # TODO we need to make sure at least one slave is connected!! + # TODO we need to make sure at least one secondary is connected!! - # WARNING if the resource state is stopped instead of master, the ocf ra dev + # WARNING if the resource state is stopped instead of primary, the ocf ra dev # rsc advises to return OCF_ERR_GENERIC, misleading the CRM in a loop where # it computes transitions of demote(failing)->stop->start->promote actions # until failcount == migration-threshold. @@ -1792,12 +1792,12 @@ sub pgsql_promote { $rc = pgsql_monitor(); if ( $rc == $OCF_SUCCESS ) { - # Running as slave. Normal, expected behavior. + # Running as secondary. Normal, expected behavior. ocf_log( 'debug', 'pgsql_promote: "%s" currently running as a standby', $OCF_RESOURCE_INSTANCE ); } elsif ( $rc == $OCF_RUNNING_MASTER ) { - # Already a master. Unexpected, but not a problem. + # Already a primary. Unexpected, but not a problem. ocf_log( 'info', '"%s" already running as a primary', $OCF_RESOURCE_INSTANCE ); return $OCF_SUCCESS; @@ -1837,24 +1837,24 @@ sub pgsql_promote { # internal error during _check_switchover } - # Do not check for a better candidate if we try to recover the master - # Recover of a master is detected during the pre-promote action. It sets the - # private attribute 'recover_master' to '1' if this is a master recover. + # Do not check for a better candidate if we try to recover the primary + # Recover of a primary is detected during the pre-promote action. It sets the + # private attribute 'recover_master' to '1' if this is a primary recover. if ( _get_priv_attr( 'recover_master' ) eq '1' ) { ocf_log( 'info', 'Recovering old master, no election needed'); } else { # The promotion is occurring on the best known candidate (highest - # master score), as chosen by pacemaker during the last working monitor - # on previous master (see pgsql_monitor/_check_locations subs). + # primary score), as chosen by pacemaker during the last working monitor + # on previous primary (see pgsql_monitor/_check_locations subs). # To avoid any race condition between the last monitor action on the - # previous master and the **real** most up-to-date standby, we + # previous primary and the **real** most up-to-date standby, we # set each standby location during the "pre-promote" action, and stored # them using the "lsn_location" resource attribute. # # The best standby to promote would have the highest known LSN. If the - # current resource is not the best one, we need to modify the master + # current resource is not the best one, we need to modify the primary # scores accordingly, and abort the current promotion. ocf_log( 'debug', 'pgsql_promote: checking if current node is the best candidate for promotion' ); @@ -1931,17 +1931,17 @@ sub pgsql_promote { } } - # If any node has been selected, we adapt the master scores accordingly + # If any node has been selected, we adapt the primary scores accordingly # and break the current promotion. if ( $node_to_promote ne '' ) { ocf_exit_reason( '%s is the best candidate to promote, aborting current promotion', $node_to_promote ); - # Reset current node master score. + # Reset current node primary score. _set_master_score( '1' ); - # Set promotion candidate master score. + # Set promotion candidate primary score. _set_master_score( '1000', $node_to_promote ); # We fail the promotion to trigger another promotion transition @@ -1972,8 +1972,8 @@ sub pgsql_promote { return $OCF_SUCCESS; } -# This action is called **before** the actual promotion when a failing master is -# considered unreclaimable, recoverable or a new master must be promoted +# This action is called **before** the actual promotion when a failing primary +# is considered unreclaimable, recoverable or a new primary must be promoted # (switchover or first start). # As every "notify" action, it is executed almost simultaneously on all # available nodes. @@ -1988,9 +1988,9 @@ sub pgsql_notify_pre_promote { ocf_log( 'info', 'Promoting instance on node "%s"', $OCF_NOTIFY_ENV{'promote'}[0]{'uname'} ); - # No need to do an election between slaves if this is recovery of the master + # No need to do an election between secondarys if this is recovery of the primary if ( _is_master_recover( $OCF_NOTIFY_ENV{'promote'}[0]{'uname'} ) ) { - ocf_log( 'warning', 'This is a master recovery!' ); + ocf_log( 'warning', 'This is a primary recovery!' ); _set_priv_attr( 'recover_master', '1' ) if $OCF_NOTIFY_ENV{'promote'}[0]{'uname'} eq $nodename; @@ -2000,12 +2000,12 @@ sub pgsql_notify_pre_promote { # Environment cleanup! _delete_priv_attr( 'lsn_location' ); - _delete_priv_attr( 'recover_master' ); + _delete_priv_attr( 'recover_primary' ); _delete_priv_attr( 'nodes' ); _delete_priv_attr( 'cancel_switchover' ); - # check for the last received entry of WAL from the master if we are - # the designated slave to promote + # check for the last received entry of WAL from the primary if we are + # the designated secondary to promote if ( _is_switchover( $nodename ) and scalar grep { $_->{'uname'} eq $nodename } @{ $OCF_NOTIFY_ENV{'promote'} } ) { @@ -2021,7 +2021,7 @@ sub pgsql_notify_pre_promote { # If the sub keeps going, that means the switchover is safe. # Keep going with the election process in case the switchover was # instruct to the wrong node. - # FIXME: should we allow a switchover to a lagging slave? + # FIXME: should we allow a switchover to a lagging secondary? } # We need to trigger an election between existing slaves to promote the best @@ -2031,7 +2031,7 @@ sub pgsql_notify_pre_promote { # During the following promote action, The designated standby for # promotion use these attributes to check if the instance to be promoted # is the best one, so we can avoid a race condition between the last - # successful monitor on the previous master and the current promotion. + # successful monitor on the previous primary and the current promotion. # As we can not break the transition from a notification action, we check # during the promotion if each node TL and LSN are valid. @@ -2058,12 +2058,12 @@ sub pgsql_notify_pre_promote { ocf_log( 'warning', 'Could not set the current node LSN' ) if $? != 0 ; - # If this node is the future master, keep track of the slaves that + # If this node is the future primary, keep track of the secondaries that # received the same notification to compare our LSN with them during # promotion if ( $OCF_NOTIFY_ENV{'promote'}[0]{'uname'} eq $nodename ) { # Build the list of active nodes: - # master + slave + start - stop + # primary + secondary + start - stop # FIXME: Deal with rsc started during the same transaction but **after** # the promotion ? $active_nodes{ $_->{'uname'} }++ foreach @{ $OCF_NOTIFY_ENV{'active'} }, @@ -2082,7 +2082,7 @@ sub pgsql_notify_pre_promote { # This action is called after a promote action. sub pgsql_notify_post_promote { - # We have a new master (or the previous one recovered). + # We have a new primary (or the previous one recovered). # Environment cleanup! _delete_priv_attr( 'lsn_location' ); _delete_priv_attr( 'recover_master' ); @@ -2103,12 +2103,12 @@ sub pgsql_notify_pre_demote { $rc = pgsql_monitor(); - # do nothing if this is not a master recovery + # do nothing if this is not a primary recovery return $OCF_SUCCESS unless _is_master_recover( $nodename ) and $rc == $OCF_FAILED_MASTER; - # in case of master crash, we need to detect if the CRM tries to recover - # the master clone. The usual transition is to do: + # in case of primary crash, we need to detect if the CRM tries to recover + # the primary clone. The usual transition is to do: # demote->stop->start->promote # # There are multiple flaws with this transition: @@ -2121,7 +2121,7 @@ sub pgsql_notify_pre_demote { # If it success, at least it will be demoted correctly with a normal # status. If it fails, it will be catched up in next steps. - ocf_log( 'info', 'Trying to start failing master "%s"...', + ocf_log( 'info', 'Trying to start failing primary "%s"...', $OCF_RESOURCE_INSTANCE ); # Either the instance managed to start or it couldn't. @@ -2147,12 +2147,12 @@ sub pgsql_notify_pre_stop { $rc = _controldata_to_ocf(); - # do nothing if this is not a slave recovery + # do nothing if this is not a secondary recovery return $OCF_SUCCESS unless _is_slave_recover( $nodename ) and $rc == $OCF_RUNNING_SLAVE; - # in case of slave crash, we need to detect if the CRM tries to recover - # the slaveclone. The usual transition is to do: stop->start + # in case of secondary crash, we need to detect if the CRM tries to recover + # the secondary clone. The usual transition is to do: stop->start # # This transition can no twork because the instance is in # OCF_ERR_GENERIC step. So the stop action will fail, leading most @@ -2162,7 +2162,7 @@ sub pgsql_notify_pre_stop { # If it success, at least it will be stopped correctly with a normal # status. If it fails, it will be catched up in next steps. - ocf_log( 'info', 'Trying to start failing slave "%s"...', + ocf_log( 'info', 'Trying to start failing secondary "%s"...', $OCF_RESOURCE_INSTANCE ); # Either the instance managed to start or it couldn't. From c0019b0411dccda3a34171417d87d377f8b3bea9 Mon Sep 17 00:00:00 2001 From: Jehan-Guillaume de Rorthais <16266+ioguix@users.noreply.github.com> Date: Thu, 28 Jan 2021 19:31:23 +0100 Subject: [PATCH 11/11] Switched to primary/secondary in comments and log messages. --- README.md | 10 +++---- debian/changelog | 33 +++------------------ lib/OCF_Functions.pm | 6 ++-- script/pgsqlms | 69 ++++++++++++++++++++++---------------------- 4 files changed, 47 insertions(+), 71 deletions(-) diff --git a/README.md b/README.md index e81fb87..20b224a 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ yet powerful. Once your PostgreSQL cluster built using internal streaming replication, PAF is able to expose to Pacemaker what is the current status of the PostgreSQL -instance on each node: primary, secondary, stopped, catching up, etc. Should a +instance on each node: primary, secondary, stopped, etc. Should a failure occurs on the primary, Pacemaker will try to recover it by default. Should the failure be non-recoverable, PAF allows the secondaries to be able to elect the best of them (the closest one to the old primary) and promote it as @@ -76,10 +76,10 @@ can set: `-c config_file=/etc/postgresql/9.3/main/postgresql.conf` * `system_user`: the system owner of your instance's process (default: `postgres`) - * `maxlag`: maximum lag allowed on a standby before we set a negative primary - score on it. The calculation is based on the difference between the current - xlog location on the primary and the write location on the standby. - (default: 0, which disables this feature) + * `maxlag`: maximum lag allowed on a standby before we set a negative + promotion score on it. The calculation is based on the difference between + the current xlog location on the primary and the write location on the + standby. (default: 0, which disables this feature) For a demonstration about how to setup a cluster, see [http://clusterlabs.github.io/PAF/documentation.html](http://clusterlabs.github.io/PAF/documentation.html). diff --git a/debian/changelog b/debian/changelog index 8a6b0e0..44f3446 100644 --- a/debian/changelog +++ b/debian/changelog @@ -108,53 +108,28 @@ resource-agents-paf (2.0.0-1) unstable; urgency=low resource-agents-paf (2.0~rc1-1) unstable; urgency=low * 2.0_rc1 first release candidate + * see CHANGELOG.md -- Jehan-Guillaume (ioguix) de Rorthais Wed, 3 Aug 2016 18:17:35 +0200 resource-agents-paf (2.0~beta2-1) unstable; urgency=low * 2.0_beta2 beta release - * fix: bug in switchover with 9.5 and 9.6 + * see CHANGELOG.md -- Jehan-Guillaume (ioguix) de Rorthais Fri, 1 Jul 2016 16:35:35 +0200 resource-agents-paf (2.0~beta1-1) unstable; urgency=low * 2.0_beta1 beta release - * fix: do not use crm_node --partition to discover resources - * fix: unknown argument --query when calling crm_master - * fix: perl warning when master score has never been set on the primary - * fix: remove wrong info message during post-promote notify - * fix: race condition when setting attributes during actions - * fix: bug where pgport and pghost where ignored in _query - * fix: use same role name than the system_user to connect - * fix: wrap crm_master calls in sub to make them synchronous - * fix: fixed a bug related to setgid in _runas - * fix: check on application_name in validate_all - * change: do not start standby with a master score of 1 - * change: choose the clone to promote when no master score exist - * new: detect and deal primary/secondary recovery transition - * new: detect and enforce reliability of a switchover - * new: set next best secondaries base on their lag - * misc: code cleanup and refactoring - * misc: various log messages cleanup and enhancement + * see CHANGELOG.md -- Jehan-Guillaume (ioguix) de Rorthais Wed, 15 Jun 2016 23:19:58 +0200 resource-agents-paf (1.0.1-1) unstable; urgency=low * 1.0.1 minor release - * fix: forbid the primary to decrease its own score (gh #19) - * fix: bad LSN decimal converstion (gh #20) - * fix: support PostgreSQL 9.5 controldata output (gh #12) - * fix: set group id of given system_user before executing commands (gh #11) - * fix: use long argument of external commands when possible - * fix: bad header leading to wrong manpage section - * fix: OCF tests when PostgreSQL does not listen in /tmp - * change: do not update score outside of a monitor action (gh #18) - * new: add parameter 'start_opts', usefull for debian and derivated (gh #11) - * new: add specific timeout for primary and secondary roles in meta-data (gh #14) - * new: add debian packaging related files + * see CHANGELOG.md -- Jehan-Guillaume (ioguix) de Rorthais Wed, 27 Apr 2016 13:22:50 +0200 diff --git a/lib/OCF_Functions.pm b/lib/OCF_Functions.pm index 723b8e4..18fb5ff 100755 --- a/lib/OCF_Functions.pm +++ b/lib/OCF_Functions.pm @@ -375,9 +375,9 @@ sub ocf_is_clone { and $ENV{'OCF_RESKEY_CRM_meta_clone_max'} > 0 ); } -# returns true if the resource is configured as a multistate -# (primary/secondary) resource. This is defined as a resource where the -# master-max meta attribute is present, and set to greater than zero. +# returns true if the resource is promotable. +# This is defined as a resource where the master-max meta attribute is present, +# and set to greater than zero. sub ocf_is_ms { return ( defined $ENV{'OCF_RESKEY_CRM_meta_master_max'} and $ENV{'OCF_RESKEY_CRM_meta_master_max'} > 0 ); diff --git a/script/pgsqlms b/script/pgsqlms index ac38b98..b9311b1 100755 --- a/script/pgsqlms +++ b/script/pgsqlms @@ -207,7 +207,7 @@ sub _get_last_received_lsn { return undef; } -# Get the primary score for each connected standby +# Get the promotion score for each connected standby # Returns directly the result set of the query or exit with an error. # Exits with OCF_ERR_GENERIC if the query failed sub _get_lag_scores { @@ -349,7 +349,7 @@ sub _delete_priv_attr { return; } -# Get, parse and return the resource primary score on given node. +# Get, parse and return the promotion score on given node. # Returns an empty string if not found. # Returns undef on crm_master call on error sub _get_master_score { @@ -417,7 +417,7 @@ sub _is_master_recover { ); } -# Check if the current transition is a recover of a secondary clone on given node. +# Check if the current transition is a recover of a standby on given node. sub _is_slave_recover { my ( $n ) = @_; @@ -706,19 +706,19 @@ sub _controldata_to_ocf { return $OCF_ERR_INSTALLED ; } -# Check the write_location of all secondaries, and adapt their primary score so +# Check the write_location of all secondaries, and adapt their promotion score so # that the instance closest to the primary will be the selected candidate should # a promotion be triggered. # NOTE: This is only a hint to pacemaker! The selected candidate to promotion # actually re-check it is the best candidate and force a re-election by failing # if a better one exists. This avoid a race condition between the call of the -# monitor action and the promotion where another secondary might have catchup faster -# with the primary. +# monitor action and the promotion where another secondary might have catchup +# faster with the primary. # NOTE: we cannot directly use the write_location, neither a lsn_diff value as # promotion score as Pacemaker considers any value greater than 1,000,000 as # INFINITY. # -# This sub must be executed from a primary monitor action. +# This sub must be executed from a monitor action on a primary. # sub _check_locations { my $partition_nodes; @@ -743,9 +743,9 @@ sub _check_locations { ocf_log( 'warning', 'No secondary connected to the master' ) if $row_num == 0; - # For each standby connected, set their primary score based on the following - # rule: the first known node/application, with the highest priority and - # an acceptable state. + # For each standby connected, set their promotion score based on the + # following rule: the first known node/application, with the highest + # priority and an acceptable state. while ( $row = shift @rs ) { if ( $partition_nodes !~ /$row->[0]/ ) { @@ -846,7 +846,7 @@ sub _check_switchover { # check if we received the shutdown checkpoint of the primary during its # demote process. # We need the last local checkpoint LSN and the last received LSN from - # primary to check in the WAL between these adresses if we have a + # the primary to check in the WAL between these adresses if we have a # "checkpoint shutdown" using pg_xlogdump/pg_waldump. # # Force a checkpoint to make sure the controldata shows the very last TL @@ -947,7 +947,7 @@ sub _confirm_stopped { my $pgctlstatus_rc; my $controldata_rc; - # Check the postprimary process status. + # Check the main postgresql process status. $pgctlstatus_rc = _pg_ctl_status(); if ( $pgctlstatus_rc == 0 ) { @@ -1061,7 +1061,7 @@ parameter is set or a template file is found. =item B -Maximum lag allowed on a standby before we set a negative primary score on it. +Maximum lag allowed on a standby before we set a negative promotion score on it. The calculation is based on the difference between the current xlog location on the primary and the write location on the standby. @@ -1155,7 +1155,7 @@ sub ocf_meta_data { - Maximum lag allowed on a standby before we set a negative primary score on it. The calculation + Maximum lag allowed on a standby before we set a negative promotion score on it. The calculation is based on the difference between the current LSN on the primary and the LSN written on the standby. This parameter must be a valid positive number as described in PostgreSQL documentation. @@ -1229,11 +1229,11 @@ Suggested minimum timeout: 20. =item B -Promotes the resource to the primary role. Suggested minimum timeout: 30. +Promotes the standby to production. Suggested minimum timeout: 30. =item B -Demotes the resource to the secondary role. Suggested minimum timeout: 120. +Demotes the primary instance as a standby. Suggested minimum timeout: 120. =item B @@ -1478,18 +1478,19 @@ sub pgsql_start { if ( $rc == $OCF_SUCCESS ) { ocf_log( 'info', 'Instance "%s" started', $OCF_RESOURCE_INSTANCE ); - # Check if a primary score exists in the cluster. + # Check if a promotion score exists in the cluster. # During the very first start of the cluster, no primary score will - # exists on any of the existing secondaries, unless an admin designated - # one of them using crm_primary. If no primary exists the cluster will - # not promote a primary among the secondaries. - # To solve this situation, we check if there is at least one primary - # score existing on one node in the cluster. Do nothing if at least - # one primary score is found among the clones of the resource. If no - # primary score exists, set a score of 1 only if the resource was a - # shut downed primary before the start. + # exists on any of the existing secondaries, unless an admin + # designated one of them using the "crm_master" command. If no + # primary exists the cluster will not promote a primary among the + # secondaries. + # To solve this situation, we check if there is at least one + # promotion score existing on one node in the cluster. Do nothing if + # at least one promotion score is found among the clones of the + # resource. If none exists, set a score of 1 only if the + # resource was a shut downed primary before the start. if ( $prev_state eq "shut down" and not _master_score_exists() ) { - ocf_log( 'info', 'No master score around. Set mine to 1' ); + ocf_log( 'info', 'No promotion score around. Set mine to 1' ); _set_master_score( '1' ); } @@ -1846,7 +1847,7 @@ sub pgsql_promote { else { # The promotion is occurring on the best known candidate (highest - # primary score), as chosen by pacemaker during the last working monitor + # promotion score), as chosen by pacemaker during the last working monitor # on previous primary (see pgsql_monitor/_check_locations subs). # To avoid any race condition between the last monitor action on the # previous primary and the **real** most up-to-date standby, we @@ -1854,7 +1855,7 @@ sub pgsql_promote { # them using the "lsn_location" resource attribute. # # The best standby to promote would have the highest known LSN. If the - # current resource is not the best one, we need to modify the primary + # current resource is not the best one, we need to modify the promotion # scores accordingly, and abort the current promotion. ocf_log( 'debug', 'pgsql_promote: checking if current node is the best candidate for promotion' ); @@ -1931,17 +1932,17 @@ sub pgsql_promote { } } - # If any node has been selected, we adapt the primary scores accordingly - # and break the current promotion. + # If any node has been selected, we adapt the promotion scores + # accordingly and break the current promotion. if ( $node_to_promote ne '' ) { ocf_exit_reason( '%s is the best candidate to promote, aborting current promotion', $node_to_promote ); - # Reset current node primary score. + # Reset current node promotion score. _set_master_score( '1' ); - # Set promotion candidate primary score. + # Set candidate promotion score. _set_master_score( '1000', $node_to_promote ); # We fail the promotion to trigger another promotion transition @@ -1988,7 +1989,7 @@ sub pgsql_notify_pre_promote { ocf_log( 'info', 'Promoting instance on node "%s"', $OCF_NOTIFY_ENV{'promote'}[0]{'uname'} ); - # No need to do an election between secondarys if this is recovery of the primary + # No need to do an election if this is recovery of the primary if ( _is_master_recover( $OCF_NOTIFY_ENV{'promote'}[0]{'uname'} ) ) { ocf_log( 'warning', 'This is a primary recovery!' ); @@ -2000,7 +2001,7 @@ sub pgsql_notify_pre_promote { # Environment cleanup! _delete_priv_attr( 'lsn_location' ); - _delete_priv_attr( 'recover_primary' ); + _delete_priv_attr( 'recover_master' ); _delete_priv_attr( 'nodes' ); _delete_priv_attr( 'cancel_switchover' );