From 9e68cb64f6660fe9e40c4ef75e1a891aa0804dbb Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 24 Oct 2024 11:18:18 -0500 Subject: [PATCH 01/16] Refactor: pacemaker-attrd: always add remoteness to attribute value XML ... even if false, for code consistency and simplicity --- daemons/attrd/attrd_attributes.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c index b3eda6e2f92..74301d678a2 100644 --- a/daemons/attrd/attrd_attributes.c +++ b/daemons/attrd/attrd_attributes.c @@ -143,14 +143,13 @@ attrd_add_value_xml(xmlNode *parent, const attribute_t *a, crm_xml_add(xml, PCMK__XA_ATTR_SET, a->set_id); crm_xml_add(xml, PCMK__XA_ATTR_USER, a->user); pcmk__xe_add_node(xml, v->nodename, v->nodeid); - if (pcmk_is_set(v->flags, attrd_value_remote)) { - crm_xml_add_int(xml, PCMK__XA_ATTR_IS_REMOTE, 1); - } crm_xml_add(xml, PCMK__XA_ATTR_VALUE, v->current); crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, pcmk__timeout_ms2s(a->timeout_ms)); crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE, pcmk_is_set(a->flags, attrd_attr_is_private)); + crm_xml_add_int(xml, PCMK__XA_ATTR_IS_REMOTE, + pcmk_is_set(v->flags, attrd_value_remote)); crm_xml_add_int(xml, PCMK__XA_ATTRD_IS_FORCE_WRITE, force_write); return xml; From a1a2e20080688865b2f49e7f84b98e41e5b0381f Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 24 Oct 2024 12:13:54 -0500 Subject: [PATCH 02/16] Refactor: pacemaker-attrd: don't use "uuid" to mean "XML ID" ... in write_attribute() --- daemons/attrd/attrd_cib.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c index a40e7a1087c..b8f509ab7d7 100644 --- a/daemons/attrd/attrd_cib.c +++ b/daemons/attrd/attrd_cib.c @@ -543,21 +543,22 @@ write_attribute(attribute_t *a, bool ignore_delay) /* Iterate over each peer value of this attribute */ g_hash_table_iter_init(&iter, a->values); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) { - const char *uuid = NULL; + const char *node_xml_id = NULL; + // Try to get the XML ID used for the node in the CIB if (pcmk_is_set(v->flags, attrd_value_remote)) { - /* If this is a Pacemaker Remote node, the node's UUID is the same - * as its name, which we already have. - */ - uuid = v->nodename; + // A Pacemaker Remote node's XML ID is the same as its name + node_xml_id = v->nodename; } else { - // This will create a cluster node cache entry if none exists + /* Get cluster node XML IDs from the peer caches. + * This will create a cluster node cache entry if none exists. + */ pcmk__node_status_t *peer = pcmk__get_node(v->nodeid, v->nodename, NULL, pcmk__node_search_any); - uuid = peer->xml_id; + node_xml_id = peer->xml_id; // Remember peer's node ID if we're just now learning it if ((peer->cluster_layer_id != 0) && (v->nodeid == 0)) { @@ -574,27 +575,27 @@ write_attribute(attribute_t *a, bool ignore_delay) } // Defer write if this is a cluster node that's never been seen - if (uuid == NULL) { + if (node_xml_id == NULL) { attrd_set_attr_flags(a, attrd_attr_uuid_missing); - crm_notice("Cannot update %s[%s]='%s' now because node's UUID is " - "unknown (will retry if learned)", + crm_notice("Cannot write %s[%s]='%s' to CIB because node's XML ID " + "is unknown (will retry if learned)", a->id, v->nodename, v->current); continue; } // Update this value as part of the CIB transaction we're building - rc = add_attr_update(a, v->current, uuid); + rc = add_attr_update(a, v->current, node_xml_id); if (rc != pcmk_rc_ok) { - crm_err("Failed to update %s[%s]='%s': %s " - QB_XS " node uuid=%s id=%" PRIu32, + crm_err("Couldn't add %s[%s]='%s' to CIB transaction: %s " + QB_XS " node XML ID %s", a->id, v->nodename, v->current, pcmk_rc_str(rc), - uuid, v->nodeid); + node_xml_id); continue; } - crm_debug("Writing %s[%s]=%s (node-state-id=%s node-id=%" PRIu32 ")", + crm_debug("Added %s[%s]=%s to CIB transaction (node XML ID %s)", a->id, v->nodename, pcmk__s(v->current, "(unset)"), - uuid, v->nodeid); + node_xml_id); cib_updates++; /* Preservation of the attribute to transmit alert */ From 704b42f153f060af814ae83e1193d383f14088c4 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 24 Oct 2024 12:29:33 -0500 Subject: [PATCH 03/16] Low: pacemaker-attrd: use API to get peer XML ID ... for cleaner separation, and to ensure it is set whenever possible. --- daemons/attrd/attrd_cib.c | 2 +- daemons/attrd/attrd_corosync.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c index b8f509ab7d7..6129f54c757 100644 --- a/daemons/attrd/attrd_cib.c +++ b/daemons/attrd/attrd_cib.c @@ -558,7 +558,7 @@ write_attribute(attribute_t *a, bool ignore_delay) NULL, pcmk__node_search_any); - node_xml_id = peer->xml_id; + node_xml_id = pcmk__cluster_node_uuid(peer); // Remember peer's node ID if we're just now learning it if ((peer->cluster_layer_id != 0) && (v->nodeid == 0)) { diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c index eeb2b9b1df6..72ebc1843be 100644 --- a/daemons/attrd/attrd_corosync.c +++ b/daemons/attrd/attrd_corosync.c @@ -215,8 +215,8 @@ record_peer_nodeid(attribute_value_t *v, const char *host) pcmk__node_status_t *known_peer = pcmk__get_node(v->nodeid, host, NULL, pcmk__node_search_cluster_member); - crm_trace("Learned %s has node id %s", - known_peer->name, known_peer->xml_id); + crm_trace("Learned %s has XML ID %s", + known_peer->name, pcmk__cluster_node_uuid(known_peer)); if (attrd_election_won()) { attrd_write_attributes(attrd_write_changed); } From 28faf9cdd3cd79b0290b9b457c5192421fc5c52f Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 24 Oct 2024 14:46:36 -0500 Subject: [PATCH 04/16] Low: pacemaker-attrd: bail earlier if value won't be written We only need the node XML ID for writing values to the CIB, so if a value will never be written, skip looking for the XML ID. This does mean that cluster nodes won't be added to the peer cache for unwritten attributes, but that shouldn't matter for them. --- daemons/attrd/attrd_cib.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c index 6129f54c757..808a7bc7e32 100644 --- a/daemons/attrd/attrd_cib.c +++ b/daemons/attrd/attrd_cib.c @@ -545,6 +545,12 @@ write_attribute(attribute_t *a, bool ignore_delay) while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) { const char *node_xml_id = NULL; + // Private attributes (or any in standalone mode) are not written to CIB + if (stand_alone || pcmk_is_set(a->flags, attrd_attr_is_private)) { + private_updates++; + continue; + } + // Try to get the XML ID used for the node in the CIB if (pcmk_is_set(v->flags, attrd_value_remote)) { // A Pacemaker Remote node's XML ID is the same as its name @@ -568,12 +574,6 @@ write_attribute(attribute_t *a, bool ignore_delay) } } - /* If this is a private attribute, no update needs to be sent */ - if (stand_alone || pcmk_is_set(a->flags, attrd_attr_is_private)) { - private_updates++; - continue; - } - // Defer write if this is a cluster node that's never been seen if (node_xml_id == NULL) { attrd_set_attr_flags(a, attrd_attr_uuid_missing); From ee01715f3ae7ff64da6f8aad0d3537faa84b013b Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 28 Oct 2024 11:24:08 -0500 Subject: [PATCH 05/16] Refactor: libcrmcluster: allow searching by XML ID in pcmk__search_node_caches() --- daemons/attrd/attrd_ipc.c | 2 +- daemons/based/based_messages.c | 2 +- daemons/controld/controld_corosync.c | 2 +- daemons/controld/controld_fencing.c | 2 +- daemons/controld/controld_messages.c | 8 ++++--- daemons/fenced/fenced_commands.c | 2 +- daemons/fenced/fenced_history.c | 2 +- daemons/fenced/fenced_remote.c | 2 +- include/crm/cluster/internal.h | 1 + lib/cluster/cpg.c | 2 +- lib/cluster/membership.c | 35 ++++++++++++++++++---------- 11 files changed, 37 insertions(+), 23 deletions(-) diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c index 4b26cdb3d77..5ab2763dbfd 100644 --- a/daemons/attrd/attrd_ipc.c +++ b/daemons/attrd/attrd_ipc.c @@ -155,7 +155,7 @@ attrd_client_peer_remove(pcmk__request_t *request) pcmk__node_status_t *node = NULL; char *host_alloc = NULL; - node = pcmk__search_node_caches(nodeid, NULL, + node = pcmk__search_node_caches(nodeid, NULL, NULL, pcmk__node_search_cluster_member); if ((node != NULL) && (node->name != NULL)) { // Use cached name if available diff --git a/daemons/based/based_messages.c b/daemons/based/based_messages.c index 25d31f49ac0..e8a85904f75 100644 --- a/daemons/based/based_messages.c +++ b/daemons/based/based_messages.c @@ -254,7 +254,7 @@ cib_process_upgrade_server(const char *op, int options, const char *section, xml // Notify originating peer so it can notify its local clients pcmk__node_status_t *origin = NULL; - origin = pcmk__search_node_caches(0, host, + origin = pcmk__search_node_caches(0, host, NULL, pcmk__node_search_cluster_member); crm_info("Rejecting upgrade request from %s: %s " diff --git a/daemons/controld/controld_corosync.c b/daemons/controld/controld_corosync.c index 02b0e823ad7..61cf6293cc5 100644 --- a/daemons/controld/controld_corosync.c +++ b/daemons/controld/controld_corosync.c @@ -119,7 +119,7 @@ cpg_membership_callback(cpg_handle_t handle, const struct cpg_name *cpg_name, if (controld_globals.dc_name != NULL) { pcmk__node_status_t *peer = NULL; - peer = pcmk__search_node_caches(0, controld_globals.dc_name, + peer = pcmk__search_node_caches(0, controld_globals.dc_name, NULL, pcmk__node_search_cluster_member); if (peer != NULL) { for (int i = 0; i < left_list_entries; ++i) { diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c index e24523cbb03..093f48eb455 100644 --- a/daemons/controld/controld_fencing.c +++ b/daemons/controld/controld_fencing.c @@ -591,7 +591,7 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event) |pcmk__node_search_cluster_cib; pcmk__node_status_t *peer = pcmk__search_node_caches(0, event->target, - flags); + NULL, flags); const char *uuid = NULL; if (peer == NULL) { diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c index 1f4b3891cea..65b1b829cec 100644 --- a/daemons/controld/controld_messages.c +++ b/daemons/controld/controld_messages.c @@ -501,7 +501,7 @@ relay_message(xmlNode * msg, gboolean originated_locally) } if (!broadcast) { - node_to = pcmk__search_node_caches(0, host_to, + node_to = pcmk__search_node_caches(0, host_to, NULL, pcmk__node_search_cluster_member); if (node_to == NULL) { crm_warn("Ignoring message %s because node %s is unknown", @@ -943,7 +943,8 @@ handle_node_info_request(const xmlNode *msg) value = controld_globals.cluster->priv->node_name; } - node = pcmk__search_node_caches(node_id, value, pcmk__node_search_any); + node = pcmk__search_node_caches(node_id, value, NULL, + pcmk__node_search_any); if (node) { crm_xml_add(reply_data, PCMK_XA_ID, node->xml_id); crm_xml_add(reply_data, PCMK_XA_UNAME, node->name); @@ -1070,7 +1071,8 @@ handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause) if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) { const char *from = crm_element_value(stored_msg, PCMK__XA_SRC); pcmk__node_status_t *node = - pcmk__search_node_caches(0, from, pcmk__node_search_cluster_member); + pcmk__search_node_caches(0, from, NULL, + pcmk__node_search_cluster_member); pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN); if(AM_I_DC == FALSE) { diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c index 082a4f5af35..9205ec727d9 100644 --- a/daemons/fenced/fenced_commands.c +++ b/daemons/fenced/fenced_commands.c @@ -2875,7 +2875,7 @@ fence_locally(xmlNode *msg, pcmk__action_result_t *result) pcmk__node_status_t *node = NULL; pcmk__scan_min_int(host, &nodeid, 0); - node = pcmk__search_node_caches(nodeid, NULL, + node = pcmk__search_node_caches(nodeid, NULL, NULL, pcmk__node_search_any |pcmk__node_search_cluster_cib); if (node != NULL) { diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c index a5285209be8..d1e088a6173 100644 --- a/daemons/fenced/fenced_history.c +++ b/daemons/fenced/fenced_history.c @@ -487,7 +487,7 @@ stonith_fence_history(xmlNode *msg, xmlNode **output, pcmk__node_status_t *node = NULL; pcmk__scan_min_int(target, &nodeid, 0); - node = pcmk__search_node_caches(nodeid, NULL, + node = pcmk__search_node_caches(nodeid, NULL, NULL, pcmk__node_search_any |pcmk__node_search_cluster_cib); if (node != NULL) { diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c index 1e19c51dc32..0f92ed5f30d 100644 --- a/daemons/fenced/fenced_remote.c +++ b/daemons/fenced/fenced_remote.c @@ -1245,7 +1245,7 @@ create_remote_stonith_op(const char *client, xmlNode *request, gboolean peer) pcmk__node_status_t *node = NULL; pcmk__scan_min_int(op->target, &nodeid, 0); - node = pcmk__search_node_caches(nodeid, NULL, + node = pcmk__search_node_caches(nodeid, NULL, NULL, pcmk__node_search_any |pcmk__node_search_cluster_cib); diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h index 11a82beee3a..0afca28950c 100644 --- a/include/crm/cluster/internal.h +++ b/include/crm/cluster/internal.h @@ -309,6 +309,7 @@ void pcmk__cluster_forget_cluster_node(uint32_t id, const char *node_name); void pcmk__cluster_forget_remote_node(const char *node_name); pcmk__node_status_t *pcmk__search_node_caches(unsigned int id, const char *uname, + const char *xml_id, uint32_t flags); void pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id); diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c index 559dd408e0f..fa9892e9938 100644 --- a/lib/cluster/cpg.c +++ b/lib/cluster/cpg.c @@ -576,7 +576,7 @@ node_left(const char *cpg_group_name, int event_counter, size_t member_list_entries) { pcmk__node_status_t *peer = - pcmk__search_node_caches(cpg_peer->nodeid, NULL, + pcmk__search_node_caches(cpg_peer->nodeid, NULL, NULL, pcmk__node_search_cluster_member); const struct cpg_address **rival = NULL; diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c index 0705b6570d8..bccbe12aa76 100644 --- a/lib/cluster/membership.c +++ b/lib/cluster/membership.c @@ -156,7 +156,7 @@ pcmk__cluster_lookup_remote_node(const char *node_name) * entry unless it has a node ID, which means the name actually is * associated with a cluster node. (@TODO return an error in that case?) */ - node = pcmk__search_node_caches(0, node_name, + node = pcmk__search_node_caches(0, node_name, NULL, pcmk__node_search_cluster_member); if ((node != NULL) && (node->xml_id == NULL)) { /* node_name could be a pointer into the cache entry being removed, so @@ -791,36 +791,47 @@ search_cluster_member_cache(unsigned int id, const char *uname, * \internal * \brief Search caches for a node (cluster or Pacemaker Remote) * - * \param[in] id If not 0, cluster node ID to search for - * \param[in] uname If not NULL, node name to search for - * \param[in] flags Group of enum pcmk__node_search_flags + * \param[in] id If not 0, cluster node ID to search for + * \param[in] uname If not NULL, node name to search for + * \param[in] xml_id If not NULL, CIB XML ID of node to search for + * \param[in] flags Group of enum pcmk__node_search_flags * * \return Node cache entry if found, otherwise NULL */ pcmk__node_status_t * -pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags) +pcmk__search_node_caches(unsigned int id, const char *uname, + const char *xml_id, uint32_t flags) { pcmk__node_status_t *node = NULL; - pcmk__assert((id > 0) || (uname != NULL)); + pcmk__assert((id > 0) || (uname != NULL) || (xml_id != NULL)); pcmk__cluster_init_node_caches(); - if ((uname != NULL) && pcmk_is_set(flags, pcmk__node_search_remote)) { - node = g_hash_table_lookup(pcmk__remote_peer_cache, uname); + if (pcmk_is_set(flags, pcmk__node_search_remote)) { + if (uname != NULL) { + node = g_hash_table_lookup(pcmk__remote_peer_cache, uname); + } else if (xml_id != NULL) { + node = g_hash_table_lookup(pcmk__remote_peer_cache, xml_id); + } } if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_cluster_member)) { - node = search_cluster_member_cache(id, uname, NULL); + node = search_cluster_member_cache(id, uname, xml_id); } if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_cluster_cib)) { - char *id_str = (id == 0)? NULL : crm_strdup_printf("%u", id); + if (xml_id != NULL) { + node = find_cib_cluster_node(xml_id, uname); + } else { + // Assumes XML ID is node ID as string (as with Corosync) + char *id_str = (id == 0)? NULL : crm_strdup_printf("%u", id); - node = find_cib_cluster_node(id_str, uname); - free(id_str); + node = find_cib_cluster_node(id_str, uname); + free(id_str); + } } return node; From 634ce35492459b03f26ecb217033fc033264287c Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 28 Oct 2024 12:26:12 -0500 Subject: [PATCH 06/16] Refactor: libcrmcluster: rename pcmk__cluster_node_uuid() ... to pcmk__cluster_get_xml_id(). It's getting the CIB XML ID; there's no such thing as a cluster-layer UUID, and it can be used with Pacemaker Remote nodes as well as cluster nodes. --- daemons/attrd/attrd_cib.c | 2 +- daemons/controld/controld_control.c | 2 +- daemons/controld/controld_fencing.c | 4 ++-- daemons/controld/controld_join_dc.c | 2 +- daemons/controld/controld_membership.c | 2 +- include/crm/cluster/internal.h | 2 +- lib/cluster/cluster.c | 9 ++++++--- lib/cluster/membership.c | 2 +- 8 files changed, 14 insertions(+), 11 deletions(-) diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c index 808a7bc7e32..ad2bf2052c1 100644 --- a/daemons/attrd/attrd_cib.c +++ b/daemons/attrd/attrd_cib.c @@ -564,7 +564,7 @@ write_attribute(attribute_t *a, bool ignore_delay) NULL, pcmk__node_search_any); - node_xml_id = pcmk__cluster_node_uuid(peer); + node_xml_id = pcmk__cluster_get_xml_id(peer); // Remember peer's node ID if we're just now learning it if ((peer->cluster_layer_id != 0) && (v->nodeid == 0)) { diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c index 4b00f894eff..ed4751b8d76 100644 --- a/daemons/controld/controld_control.c +++ b/daemons/controld/controld_control.c @@ -69,7 +69,7 @@ do_ha_control(long long action, free(controld_globals.our_uuid); controld_globals.our_uuid = - pcmk__str_copy(pcmk__cluster_node_uuid(node)); + pcmk__str_copy(pcmk__cluster_get_xml_id(node)); if (controld_globals.our_uuid == NULL) { crm_err("Could not obtain local uuid"); diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c index 093f48eb455..7565b6c6c40 100644 --- a/daemons/controld/controld_fencing.c +++ b/daemons/controld/controld_fencing.c @@ -384,7 +384,7 @@ execute_stonith_cleanup(void) char *target = iter->data; pcmk__node_status_t *target_node = pcmk__get_node(0, target, NULL, pcmk__node_search_cluster_member); - const char *uuid = pcmk__cluster_node_uuid(target_node); + const char *uuid = pcmk__cluster_get_xml_id(target_node); crm_notice("Marking %s, target of a previous stonith action, as clean", target); send_stonith_update(NULL, target, uuid); @@ -598,7 +598,7 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event) return; } - uuid = pcmk__cluster_node_uuid(peer); + uuid = pcmk__cluster_get_xml_id(peer); if (AM_I_DC) { /* The DC always sends updates */ diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c index 7ada26949de..9960966c6ba 100644 --- a/daemons/controld/controld_join_dc.c +++ b/daemons/controld/controld_join_dc.c @@ -920,7 +920,7 @@ finalize_join_for(gpointer key, gpointer value, gpointer user_data) */ crm_trace("Updating node name and UUID in CIB for %s", join_to); tmp1 = pcmk__xe_create(NULL, PCMK_XE_NODE); - crm_xml_add(tmp1, PCMK_XA_ID, pcmk__cluster_node_uuid(join_node)); + crm_xml_add(tmp1, PCMK_XA_ID, pcmk__cluster_get_xml_id(join_node)); crm_xml_add(tmp1, PCMK_XA_UNAME, join_to); fsa_cib_anon_update(PCMK_XE_NODES, tmp1); pcmk__xml_free(tmp1); diff --git a/daemons/controld/controld_membership.c b/daemons/controld/controld_membership.c index 8075955953d..daf0c5fd439 100644 --- a/daemons/controld/controld_membership.c +++ b/daemons/controld/controld_membership.c @@ -142,7 +142,7 @@ create_node_state_update(pcmk__node_status_t *node, int flags, } if (crm_xml_add(node_state, PCMK_XA_ID, - pcmk__cluster_node_uuid(node)) == NULL) { + pcmk__cluster_get_xml_id(node)) == NULL) { crm_info("Node update for %s cancelled: no ID", node->name); pcmk__xml_free(node_state); return NULL; diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h index 0afca28950c..bc722cb3de5 100644 --- a/include/crm/cluster/internal.h +++ b/include/crm/cluster/internal.h @@ -260,7 +260,7 @@ char *pcmk__cpg_message_data(cpg_handle_t handle, uint32_t sender_id, # endif -const char *pcmk__cluster_node_uuid(pcmk__node_status_t *node); +const char *pcmk__cluster_get_xml_id(pcmk__node_status_t *node); char *pcmk__cluster_node_name(uint32_t nodeid); const char *pcmk__cluster_local_node_name(void); const char *pcmk__node_name_from_uuid(const char *uuid); diff --git a/lib/cluster/cluster.c b/lib/cluster/cluster.c index 3427a409f39..87abcfc43ec 100644 --- a/lib/cluster/cluster.c +++ b/lib/cluster/cluster.c @@ -34,14 +34,14 @@ CRM_TRACE_INIT_DATA(cluster); /*! * \internal - * \brief Get a node's cluster-layer UUID, setting it if not already set + * \brief Get a node's XML ID in the CIB, setting it if not already set * * \param[in,out] node Node to check * - * \return Cluster-layer node UUID of \p node, or \c NULL if unknown + * \return CIB XML ID of \p node if known, otherwise \c NULL */ const char * -pcmk__cluster_node_uuid(pcmk__node_status_t *node) +pcmk__cluster_get_xml_id(pcmk__node_status_t *node) { const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer(); @@ -52,6 +52,9 @@ pcmk__cluster_node_uuid(pcmk__node_status_t *node) return node->xml_id; } + // xml_id is always set when a Pacemaker Remote node entry is created + CRM_CHECK(!pcmk_is_set(node->flags, pcmk__node_status_remote), return NULL); + switch (cluster_layer) { #if SUPPORT_COROSYNC case pcmk_cluster_layer_corosync: diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c index bccbe12aa76..ad55658d780 100644 --- a/lib/cluster/membership.c +++ b/lib/cluster/membership.c @@ -1000,7 +1000,7 @@ pcmk__get_node(unsigned int id, const char *uname, const char *xml_id, } if ((xml_id == NULL) && (node->xml_id == NULL)) { - xml_id = pcmk__cluster_node_uuid(node); + xml_id = pcmk__cluster_get_xml_id(node); if (xml_id == NULL) { crm_debug("Cannot obtain an XML ID for node %s[%u] at this time", node->name, id); From 8b4d6075c778f374f7289a910dad03f425e27728 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Mon, 28 Oct 2024 12:40:14 -0500 Subject: [PATCH 07/16] Low: libcrmcluster: use pcmk__cluster_get_xml_id() when possible ... rather than using node->xml_id directly, so it gets set whenever possible. Also, make comparisons case-sensitive. --- lib/cluster/cluster.c | 3 ++- lib/cluster/election.c | 7 ++++--- lib/cluster/membership.c | 14 +++++++++----- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/lib/cluster/cluster.c b/lib/cluster/cluster.c index 87abcfc43ec..b560eaae526 100644 --- a/lib/cluster/cluster.c +++ b/lib/cluster/cluster.c @@ -337,7 +337,8 @@ pcmk__node_name_from_uuid(const char *uuid) g_hash_table_iter_init(&iter, pcmk__peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { - if (pcmk__str_eq(node->xml_id, uuid, pcmk__str_casei)) { + if (pcmk__str_eq(uuid, pcmk__cluster_get_xml_id(node), + pcmk__str_none)) { return node->name; } } diff --git a/lib/cluster/election.c b/lib/cluster/election.c index 60a9156de9f..51d4630b18b 100644 --- a/lib/cluster/election.c +++ b/lib/cluster/election.c @@ -307,7 +307,8 @@ election_vote(pcmk_cluster_t *cluster) NULL, message_type, CRM_OP_VOTE, NULL); cluster->priv->election->count++; - crm_xml_add(vote, PCMK__XA_ELECTION_OWNER, our_node->xml_id); + crm_xml_add(vote, PCMK__XA_ELECTION_OWNER, + pcmk__cluster_get_xml_id(our_node)); crm_xml_add_int(vote, PCMK__XA_ELECTION_ID, cluster->priv->election->count); // Warning: PCMK__XA_ELECTION_AGE_NANO_SEC value is actually microseconds @@ -546,8 +547,8 @@ election_count_vote(pcmk_cluster_t *cluster, const xmlNode *message, our_node = pcmk__get_node(0, cluster->priv->node_name, NULL, pcmk__node_search_cluster_member); we_are_owner = (our_node != NULL) - && pcmk__str_eq(our_node->xml_id, vote.election_owner, - pcmk__str_none); + && pcmk__str_eq(pcmk__cluster_get_xml_id(our_node), + vote.election_owner, pcmk__str_none); if (!can_win) { reason = "Not eligible"; diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c index ad55658d780..e033f4e7545 100644 --- a/lib/cluster/membership.c +++ b/lib/cluster/membership.c @@ -153,7 +153,7 @@ pcmk__cluster_lookup_remote_node(const char *node_name) /* It's theoretically possible that the node was added to the cluster peer * cache before it was known to be a Pacemaker Remote node. Remove that - * entry unless it has a node ID, which means the name actually is + * entry unless it has an XML ID, which means the name actually is * associated with a cluster node. (@TODO return an error in that case?) */ node = pcmk__search_node_caches(0, node_name, NULL, @@ -713,8 +713,11 @@ search_cluster_member_cache(unsigned int id, const char *uname, } else if (uuid != NULL) { g_hash_table_iter_init(&iter, pcmk__peer_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { - if (pcmk__str_eq(node->xml_id, uuid, pcmk__str_casei)) { - crm_trace("UUID match: %s", node->xml_id); + const char *this_xml_id = pcmk__cluster_get_xml_id(node); + + if (pcmk__str_eq(uuid, this_xml_id, pcmk__str_none)) { + crm_trace("Found cluster node cache entry by XML ID %s", + this_xml_id); by_id = node; break; } @@ -1388,7 +1391,8 @@ find_cib_cluster_node(const char *id, const char *uname) if (id) { g_hash_table_iter_init(&iter, cluster_node_cib_cache); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) { - if (pcmk__str_eq(node->xml_id, id, pcmk__str_casei)) { + if (pcmk__str_eq(id, pcmk__cluster_get_xml_id(node), + pcmk__str_none)) { crm_trace("ID match: %s= %p", id, node); by_id = node; break; @@ -1424,7 +1428,7 @@ find_cib_cluster_node(const char *id, const char *uname) * Return by_id. */ } else if ((id != NULL) && (by_name->xml_id != NULL) - && pcmk__str_eq(id, by_name->xml_id, pcmk__str_casei)) { + && pcmk__str_eq(id, by_name->xml_id, pcmk__str_none)) { /* Multiple nodes have the same id in the CIB. * Return by_name. */ node = by_name; From e3376f56cdb35a3f87389b71111bdfe29a0ea31b Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 30 Oct 2024 10:43:51 -0500 Subject: [PATCH 08/16] Refactor: libcrmcluster: track local node XML ID in cluster object This effectively reverts 7afc16075 --- include/crm/cluster/internal.h | 1 + lib/cluster/cluster.c | 1 + lib/cluster/corosync.c | 9 +++++++-- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h index bc722cb3de5..0d0ed59f2ac 100644 --- a/include/crm/cluster/internal.h +++ b/include/crm/cluster/internal.h @@ -91,6 +91,7 @@ typedef struct pcmk__election pcmk__election_t; struct pcmk__cluster_private { enum pcmk_ipc_server server; //!< Server this connection is for (if any) char *node_name; //!< Local node name at cluster layer + char *node_xml_id; //!< Local node XML ID in CIB pcmk__election_t *election; //!< Election state (if election is needed) /* @TODO Corosync uses an integer node ID, but cluster layers in the diff --git a/lib/cluster/cluster.c b/lib/cluster/cluster.c index b560eaae526..dda4b8e89ae 100644 --- a/lib/cluster/cluster.c +++ b/lib/cluster/cluster.c @@ -166,6 +166,7 @@ pcmk_cluster_free(pcmk_cluster_t *cluster) return; } election_fini(cluster); + free(cluster->priv->node_xml_id); free(cluster->priv->node_name); free(cluster->priv); free(cluster); diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c index 32443a1e079..2782b100671 100644 --- a/lib/cluster/corosync.c +++ b/lib/cluster/corosync.c @@ -460,6 +460,7 @@ pcmk__corosync_connect(pcmk_cluster_t *cluster) { const enum pcmk_cluster_layer cluster_layer = pcmk_get_cluster_layer(); const char *cluster_layer_s = pcmk_cluster_layer_text(cluster_layer); + pcmk__node_status_t *local_node = NULL; int rc = pcmk_rc_ok; pcmk__cluster_init_node_caches(); @@ -490,8 +491,12 @@ pcmk__corosync_connect(pcmk_cluster_t *cluster) } // Ensure local node always exists in peer cache - pcmk__get_node(cluster->priv->node_id, cluster->priv->node_name, NULL, - pcmk__node_search_cluster_member); + local_node = pcmk__get_node(cluster->priv->node_id, + cluster->priv->node_name, NULL, + pcmk__node_search_cluster_member); + + cluster->priv->node_xml_id = pcmk__corosync_uuid(local_node); + CRM_LOG_ASSERT(cluster->priv->node_xml_id != NULL); return pcmk_rc_ok; } From 85d7a70916c5fd85d89ad34396be5df0ed151669 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 24 Oct 2024 11:30:00 -0500 Subject: [PATCH 09/16] Refactor: pacemaker-attrd: track node CIB ID rather than cluster ID Previously, attribute_value_t had a nodeid member to track the cluster ID of the node that the value is for. However the only reason we need it is to be able to get the node's XML ID in the CIB, for writing out the value. Rename it to node_xml_id and track the XML ID directly. In practice, there is no real change, since the CIB XML ID of Corosync nodes is simply their cluster ID as a string. This allows us to keep the same XML attribute and value in peer messages for backward compatibility. If we ever support node XML IDs that are *not* the string equivalent of their cluster IDs, rolling upgrades will be possible only from versions with this commit and later. --- daemons/attrd/attrd_alerts.c | 12 ++++++++- daemons/attrd/attrd_attributes.c | 11 +++++++- daemons/attrd/attrd_cib.c | 45 ++++++++++++++++++++------------ daemons/attrd/attrd_corosync.c | 42 +++++++++++++++-------------- daemons/attrd/attrd_ipc.c | 14 +++++----- daemons/attrd/attrd_messages.c | 6 +++-- daemons/attrd/attrd_utils.c | 1 + daemons/attrd/pacemaker-attrd.h | 4 +-- 8 files changed, 86 insertions(+), 49 deletions(-) diff --git a/daemons/attrd/attrd_alerts.c b/daemons/attrd/attrd_alerts.c index 55cb477c22e..81d02d2ce2c 100644 --- a/daemons/attrd/attrd_alerts.c +++ b/daemons/attrd/attrd_alerts.c @@ -124,12 +124,22 @@ attrd_read_options(gpointer user_data) } int -attrd_send_attribute_alert(const char *node, int nodeid, +attrd_send_attribute_alert(const char *node, const char *node_xml_id, const char *attr, const char *value) { + uint32_t nodeid = 0U; + pcmk__node_status_t *node_status = NULL; + if (attrd_alert_list == NULL) { return pcmk_ok; } + node_status = pcmk__search_node_caches(0, node, node_xml_id, + pcmk__node_search_remote + |pcmk__node_search_cluster_member + |pcmk__node_search_cluster_cib); + if (node_status != NULL) { + nodeid = node_status->cluster_layer_id; + } return lrmd_send_attribute_alert(attrd_lrmd_connect(), attrd_alert_list, node, nodeid, attr, value); } diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c index 74301d678a2..6d80acfce1c 100644 --- a/daemons/attrd/attrd_attributes.c +++ b/daemons/attrd/attrd_attributes.c @@ -142,7 +142,16 @@ attrd_add_value_xml(xmlNode *parent, const attribute_t *a, crm_xml_add(xml, PCMK__XA_ATTR_SET_TYPE, a->set_type); crm_xml_add(xml, PCMK__XA_ATTR_SET, a->set_id); crm_xml_add(xml, PCMK__XA_ATTR_USER, a->user); - pcmk__xe_add_node(xml, v->nodename, v->nodeid); + crm_xml_add(xml, PCMK__XA_ATTR_HOST, v->nodename); + + /* @COMPAT Prior to 2.1.10 and 3.0.1, the node's cluster ID was added + * instead of its XML ID. For Corosync and Pacemaker Remote nodes, those are + * the same, but if we ever support node XML IDs that differ from their + * cluster IDs, we will have to drop support for rolling upgrades from + * versions before those. + */ + crm_xml_add(xml, PCMK__XA_ATTR_HOST_ID, v->node_xml_id); + crm_xml_add(xml, PCMK__XA_ATTR_VALUE, v->current); crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, pcmk__timeout_ms2s(a->timeout_ms)); diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c index ad2bf2052c1..665af3625d8 100644 --- a/daemons/attrd/attrd_cib.c +++ b/daemons/attrd/attrd_cib.c @@ -10,7 +10,6 @@ #include #include -#include // PRIu32 #include #include #include @@ -450,10 +449,12 @@ send_alert_attributes_value(attribute_t *a, GHashTable *t) g_hash_table_iter_init(&vIter, t); while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & at)) { - rc = attrd_send_attribute_alert(at->nodename, at->nodeid, + rc = attrd_send_attribute_alert(at->nodename, at->node_xml_id, a->id, at->current); - crm_trace("Sent alerts for %s[%s]=%s: nodeid=%d rc=%d", - a->id, at->nodename, at->current, at->nodeid, rc); + crm_trace("Sent alerts for %s[%s]=%s with node XML ID %s " + "(%s agents failed)", + a->id, at->nodename, at->current, at->node_xml_id, + ((rc == 0)? "no" : ((rc == -1)? "some" : "all"))); } } @@ -462,7 +463,7 @@ set_alert_attribute_value(GHashTable *t, attribute_value_t *v) { attribute_value_t *a_v = pcmk__assert_alloc(1, sizeof(attribute_value_t)); - a_v->nodeid = v->nodeid; + a_v->node_xml_id = pcmk__str_copy(v->node_xml_id); a_v->nodename = pcmk__str_copy(v->nodename); a_v->current = pcmk__str_copy(v->current); @@ -551,26 +552,25 @@ write_attribute(attribute_t *a, bool ignore_delay) continue; } - // Try to get the XML ID used for the node in the CIB + /* We need the node's CIB XML ID to write out its attributes, so look + * for it now. Check the node caches first, even if the ID was + * previously known (in case it changed), but use any previous value as + * a fallback. + */ + if (pcmk_is_set(v->flags, attrd_value_remote)) { // A Pacemaker Remote node's XML ID is the same as its name node_xml_id = v->nodename; } else { - /* Get cluster node XML IDs from the peer caches. - * This will create a cluster node cache entry if none exists. - */ - pcmk__node_status_t *peer = pcmk__get_node(v->nodeid, v->nodename, - NULL, + // This creates a cluster node cache entry if none exists + pcmk__node_status_t *peer = pcmk__get_node(0, v->nodename, + v->node_xml_id, pcmk__node_search_any); node_xml_id = pcmk__cluster_get_xml_id(peer); - - // Remember peer's node ID if we're just now learning it - if ((peer->cluster_layer_id != 0) && (v->nodeid == 0)) { - crm_trace("Learned ID %" PRIu32 " for node %s", - peer->cluster_layer_id, v->nodename); - v->nodeid = peer->cluster_layer_id; + if (node_xml_id == NULL) { + node_xml_id = v->node_xml_id; } } @@ -583,6 +583,17 @@ write_attribute(attribute_t *a, bool ignore_delay) continue; } + /* Remember the XML ID and let peers know it (in case one of them + * becomes the writer later) + */ + if (!pcmk__str_eq(v->node_xml_id, node_xml_id, pcmk__str_none)) { + crm_trace("Setting %s[%s] node XML ID to %s (was %s)", + a->id, v->nodename, node_xml_id, + pcmk__s(v->node_xml_id, "unknown")); + pcmk__str_update(&(v->node_xml_id), node_xml_id); + attrd_broadcast_value(a, v); + } + // Update this value as part of the CIB transaction we're building rc = add_attr_update(a, v->current, node_xml_id); if (rc != pcmk_rc_ok) { diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c index 72ebc1843be..02816b94d20 100644 --- a/daemons/attrd/attrd_corosync.c +++ b/daemons/attrd/attrd_corosync.c @@ -209,19 +209,6 @@ attrd_peer_change_cb(enum pcmk__node_update kind, pcmk__node_status_t *peer, } } -static void -record_peer_nodeid(attribute_value_t *v, const char *host) -{ - pcmk__node_status_t *known_peer = - pcmk__get_node(v->nodeid, host, NULL, pcmk__node_search_cluster_member); - - crm_trace("Learned %s has XML ID %s", - known_peer->name, pcmk__cluster_node_uuid(known_peer)); - if (attrd_election_won()) { - attrd_write_attributes(attrd_write_changed); - } -} - #define readable_value(rv_v) pcmk__s((rv_v)->current, "(unset)") #define readable_peer(p) \ @@ -235,6 +222,7 @@ update_attr_on_host(attribute_t *a, const pcmk__node_status_t *peer, int is_remote = 0; bool changed = false; attribute_value_t *v = NULL; + const char *node_xml_id = crm_element_value(xml, PCMK__XA_ATTR_HOST_ID); // Create entry for value if not already existing v = g_hash_table_lookup(a->values, host); @@ -245,6 +233,13 @@ update_attr_on_host(attribute_t *a, const pcmk__node_status_t *peer, g_hash_table_replace(a->values, v->nodename, v); } + /* If update doesn't contain the node XML ID, fall back to any previously + * known value (for logging) + */ + if (node_xml_id == NULL) { + node_xml_id = v->node_xml_id; + } + // If value is for a Pacemaker Remote node, remember that crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote); if (is_remote) { @@ -270,11 +265,12 @@ update_attr_on_host(attribute_t *a, const pcmk__node_status_t *peer, } else if (changed) { crm_notice("Setting %s[%s]%s%s: %s -> %s " - QB_XS " from %s with %s write delay", + QB_XS " from %s with %s write delay and node XML ID %s", attr, host, a->set_type ? " in " : "", pcmk__s(a->set_type, ""), readable_value(v), pcmk__s(value, "(unset)"), peer->name, - (a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms)); + (a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms), + pcmk__s(node_xml_id, "unknown")); pcmk__str_update(&v->current, value); attrd_set_attr_flags(a, attrd_attr_changed); @@ -319,11 +315,17 @@ update_attr_on_host(attribute_t *a, const pcmk__node_status_t *peer, // This allows us to later detect local values that peer doesn't know about attrd_set_value_flags(v, attrd_value_from_peer); - /* If this is a cluster node whose node ID we are learning, remember it */ - if ((v->nodeid == 0) && !pcmk_is_set(v->flags, attrd_value_remote) - && (crm_element_value_int(xml, PCMK__XA_ATTR_HOST_ID, - (int*)&v->nodeid) == 0) && (v->nodeid > 0)) { - record_peer_nodeid(v, host); + // Remember node's XML ID if we're just learning it + if ((node_xml_id != NULL) + && !pcmk__str_eq(node_xml_id, v->node_xml_id, pcmk__str_none)) { + crm_trace("Learned %s[%s] node XML ID is %s (was %s)", + a->id, v->nodename, node_xml_id, + pcmk__s(v->node_xml_id, "unknown")); + pcmk__str_update(&(v->node_xml_id), node_xml_id); + if (attrd_election_won()) { + // In case we couldn't write a value missing the XML ID before + attrd_write_attributes(attrd_write_changed); + } } } diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c index 5ab2763dbfd..fd917a37bb5 100644 --- a/daemons/attrd/attrd_ipc.c +++ b/daemons/attrd/attrd_ipc.c @@ -12,6 +12,7 @@ #include #include #include +#include // PRIu32 #include #include @@ -232,12 +233,13 @@ attrd_client_refresh(pcmk__request_t *request) static void handle_missing_host(xmlNode *xml) { - const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST); - - if (host == NULL) { - crm_trace("Inferring host"); - pcmk__xe_add_node(xml, attrd_cluster->priv->node_name, - attrd_cluster->priv->node_id); + if (crm_element_value(xml, PCMK__XA_ATTR_HOST) == NULL) { + crm_trace("Inferring local node %s with XML ID %s", + attrd_cluster->priv->node_name, + attrd_cluster->priv->node_xml_id); + crm_xml_add(xml, PCMK__XA_ATTR_HOST, attrd_cluster->priv->node_name); + crm_xml_add(xml, PCMK__XA_ATTR_HOST_ID, + attrd_cluster->priv->node_xml_id); } } diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c index b6eebc66cb6..e1038a820b5 100644 --- a/daemons/attrd/attrd_messages.c +++ b/daemons/attrd/attrd_messages.c @@ -9,6 +9,7 @@ #include +#include // PRIu32 #include #include @@ -314,8 +315,9 @@ attrd_broadcast_protocol(void) crm_xml_add(attrd_op, PCMK__XA_ATTR_NAME, CRM_ATTR_PROTOCOL); crm_xml_add(attrd_op, PCMK__XA_ATTR_VALUE, ATTRD_PROTOCOL_VERSION); crm_xml_add_int(attrd_op, PCMK__XA_ATTR_IS_PRIVATE, 1); - pcmk__xe_add_node(attrd_op, attrd_cluster->priv->node_name, - attrd_cluster->priv->node_id); + crm_xml_add(attrd_op, PCMK__XA_ATTR_HOST, attrd_cluster->priv->node_name); + crm_xml_add(attrd_op, PCMK__XA_ATTR_HOST_ID, + attrd_cluster->priv->node_xml_id); crm_debug("Broadcasting attrd protocol version %s for node %s", ATTRD_PROTOCOL_VERSION, attrd_cluster->priv->node_name); diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c index f219b8862d3..3621f5f3543 100644 --- a/daemons/attrd/attrd_utils.c +++ b/daemons/attrd/attrd_utils.c @@ -232,6 +232,7 @@ attrd_free_attribute_value(gpointer data) free(v->nodename); free(v->current); free(v->requested); + free(v->node_xml_id); free(v); } diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h index 13646b8e51d..07103a6b01c 100644 --- a/daemons/attrd/pacemaker-attrd.h +++ b/daemons/attrd/pacemaker-attrd.h @@ -99,7 +99,7 @@ extern crm_trigger_t *attrd_config_read; void attrd_lrmd_disconnect(void); gboolean attrd_read_options(gpointer user_data); -int attrd_send_attribute_alert(const char *node, int nodeid, +int attrd_send_attribute_alert(const char *node, const char *node_xml_id, const char *attr, const char *value); // Elections @@ -155,7 +155,7 @@ typedef struct attribute_value_s { char *nodename; // Node that this value is for char *current; // Attribute value char *requested; // Value specified in pending CIB write, if any - uint32_t nodeid; // Cluster node ID of node that this value is for + char *node_xml_id; // XML ID used for node in CIB uint32_t flags; // Group of attrd_value_flags } attribute_value_t; From aee2a5af668b6e15d9da6ecbbba7521acd1f0ea1 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 7 Feb 2024 16:05:50 -0600 Subject: [PATCH 10/16] Refactor: pacemaker-attrd: rename flag to match recent change --- daemons/attrd/attrd_cib.c | 15 +++++++++------ daemons/attrd/pacemaker-attrd.h | 16 ++++++++++++---- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c index 665af3625d8..e7eeaa96d9c 100644 --- a/daemons/attrd/attrd_cib.c +++ b/daemons/attrd/attrd_cib.c @@ -532,10 +532,13 @@ write_attribute(attribute_t *a, bool ignore_delay) } } - /* Attribute will be written shortly, so clear changed flag and force - * write flag, and initialize UUID missing flag to false. + /* The changed and force-write flags apply only to the next write, + * which this is, so clear them now. Also clear the "node unknown" flag + * because we will check whether it is known below and reset if appopriate. */ - attrd_clear_attr_flags(a, attrd_attr_changed|attrd_attr_uuid_missing|attrd_attr_force_write); + attrd_clear_attr_flags(a, attrd_attr_changed + |attrd_attr_force_write + |attrd_attr_node_unknown); /* Make the table for the attribute trap */ alert_attribute_value = pcmk__strikey_table(NULL, @@ -576,7 +579,7 @@ write_attribute(attribute_t *a, bool ignore_delay) // Defer write if this is a cluster node that's never been seen if (node_xml_id == NULL) { - attrd_set_attr_flags(a, attrd_attr_uuid_missing); + attrd_set_attr_flags(a, attrd_attr_node_unknown); crm_notice("Cannot write %s[%s]='%s' to CIB because node's XML ID " "is unknown (will retry if learned)", a->id, v->nodename, v->current); @@ -668,8 +671,8 @@ attrd_write_attributes(uint32_t options) pcmk_is_set(options, attrd_write_all)? "all" : "changed"); g_hash_table_iter_init(&iter, attributes); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) { - if (!pcmk_is_set(options, attrd_write_all) && - pcmk_is_set(a->flags, attrd_attr_uuid_missing)) { + if (!pcmk_is_set(options, attrd_write_all) + && pcmk_is_set(a->flags, attrd_attr_node_unknown)) { // Try writing this attribute again, in case peer ID was learned attrd_set_attr_flags(a, attrd_attr_changed); } else if (pcmk_is_set(a->flags, attrd_attr_force_write)) { diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h index 07103a6b01c..f0535eabaa0 100644 --- a/daemons/attrd/pacemaker-attrd.h +++ b/daemons/attrd/pacemaker-attrd.h @@ -115,10 +115,18 @@ void attrd_xml_add_writer(xmlNode *xml); enum attrd_attr_flags { attrd_attr_none = 0U, - attrd_attr_changed = (1U << 0), // Attribute value has changed since last write - attrd_attr_uuid_missing = (1U << 1), // Whether we know we're missing a peer UUID - attrd_attr_is_private = (1U << 2), // Whether to keep this attribute out of the CIB - attrd_attr_force_write = (1U << 3), // Update attribute by ignoring delay + + // At least one of attribute's values has changed since last write + attrd_attr_changed = (1U << 0), + + // At least one of attribute's values has an unknown node XML ID + attrd_attr_node_unknown = (1U << 1), + + // This attribute should never be written to the CIB + attrd_attr_is_private = (1U << 2), + + // Ignore any configured delay for next write of this attribute + attrd_attr_force_write = (1U << 3), }; typedef struct attribute_s { From a045a72a7ea122c10c4ceacb0cf15ff7ecd125c0 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Wed, 7 Feb 2024 16:13:44 -0600 Subject: [PATCH 11/16] Refactor: pacemaker-attrd: use variable for whether to write ... for readability and to reduce code duplication --- daemons/attrd/attrd_cib.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c index e7eeaa96d9c..193b06739e0 100644 --- a/daemons/attrd/attrd_cib.c +++ b/daemons/attrd/attrd_cib.c @@ -492,13 +492,19 @@ write_attribute(attribute_t *a, bool ignore_delay) GHashTableIter iter; GHashTable *alert_attribute_value = NULL; int rc = pcmk_ok; + bool should_write = true; if (a == NULL) { return; } + // Private attributes (or any in standalone mode) are not written to the CIB + if (stand_alone || pcmk_is_set(a->flags, attrd_attr_is_private)) { + should_write = false; + } + /* If this attribute will be written to the CIB ... */ - if (!stand_alone && !pcmk_is_set(a->flags, attrd_attr_is_private)) { + if (should_write) { /* Defer the write if now's not a good time */ if (a->update && (a->update < last_cib_op_done)) { crm_info("Write out of '%s' continuing: update %d considered lost", @@ -549,8 +555,7 @@ write_attribute(attribute_t *a, bool ignore_delay) while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) { const char *node_xml_id = NULL; - // Private attributes (or any in standalone mode) are not written to CIB - if (stand_alone || pcmk_is_set(a->flags, attrd_attr_is_private)) { + if (!should_write) { private_updates++; continue; } From 034c421a457b9dd5c654cb26292d9c05b1cd9244 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 24 Oct 2024 17:36:40 -0500 Subject: [PATCH 12/16] Low: pacemaker-attrd: track node XML IDs independent of attribute values Previously, node XML IDs were kept in attribute_value_t. That meant that they were duplicated for every value for a node, the values might be known for some values and unknown or inconsistent for others, and newly learned XML IDs would have to be broadcast per value. Now, maintain a global node XML ID cache. --- daemons/attrd/Makefile.am | 1 + daemons/attrd/attrd_attributes.c | 2 +- daemons/attrd/attrd_cib.c | 25 +++++----- daemons/attrd/attrd_corosync.c | 11 +++-- daemons/attrd/attrd_nodes.c | 82 ++++++++++++++++++++++++++++++++ daemons/attrd/attrd_utils.c | 1 - daemons/attrd/pacemaker-attrd.c | 2 + daemons/attrd/pacemaker-attrd.h | 6 +++ 8 files changed, 112 insertions(+), 18 deletions(-) create mode 100644 daemons/attrd/attrd_nodes.c diff --git a/daemons/attrd/Makefile.am b/daemons/attrd/Makefile.am index 47119679cf4..a2c8fd1477c 100644 --- a/daemons/attrd/Makefile.am +++ b/daemons/attrd/Makefile.am @@ -31,6 +31,7 @@ pacemaker_attrd_SOURCES = attrd_alerts.c \ attrd_elections.c \ attrd_ipc.c \ attrd_messages.c \ + attrd_nodes.c \ attrd_sync.c \ attrd_utils.c \ pacemaker-attrd.c diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c index 6d80acfce1c..fdc238375e7 100644 --- a/daemons/attrd/attrd_attributes.c +++ b/daemons/attrd/attrd_attributes.c @@ -150,7 +150,7 @@ attrd_add_value_xml(xmlNode *parent, const attribute_t *a, * cluster IDs, we will have to drop support for rolling upgrades from * versions before those. */ - crm_xml_add(xml, PCMK__XA_ATTR_HOST_ID, v->node_xml_id); + crm_xml_add(xml, PCMK__XA_ATTR_HOST_ID, attrd_get_node_xml_id(v->nodename)); crm_xml_add(xml, PCMK__XA_ATTR_VALUE, v->current); crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c index 193b06739e0..4231e4a668a 100644 --- a/daemons/attrd/attrd_cib.c +++ b/daemons/attrd/attrd_cib.c @@ -449,11 +449,14 @@ send_alert_attributes_value(attribute_t *a, GHashTable *t) g_hash_table_iter_init(&vIter, t); while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & at)) { - rc = attrd_send_attribute_alert(at->nodename, at->node_xml_id, + const char *node_xml_id = attrd_get_node_xml_id(at->nodename); + + rc = attrd_send_attribute_alert(at->nodename, node_xml_id, a->id, at->current); crm_trace("Sent alerts for %s[%s]=%s with node XML ID %s " "(%s agents failed)", - a->id, at->nodename, at->current, at->node_xml_id, + a->id, at->nodename, at->current, + pcmk__s(node_xml_id, "unknown"), ((rc == 0)? "no" : ((rc == -1)? "some" : "all"))); } } @@ -463,7 +466,6 @@ set_alert_attribute_value(GHashTable *t, attribute_value_t *v) { attribute_value_t *a_v = pcmk__assert_alloc(1, sizeof(attribute_value_t)); - a_v->node_xml_id = pcmk__str_copy(v->node_xml_id); a_v->nodename = pcmk__str_copy(v->nodename); a_v->current = pcmk__str_copy(v->current); @@ -554,6 +556,7 @@ write_attribute(attribute_t *a, bool ignore_delay) g_hash_table_iter_init(&iter, a->values); while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) { const char *node_xml_id = NULL; + const char *prev_xml_id = NULL; if (!should_write) { private_updates++; @@ -566,6 +569,8 @@ write_attribute(attribute_t *a, bool ignore_delay) * a fallback. */ + prev_xml_id = attrd_get_node_xml_id(v->nodename); + if (pcmk_is_set(v->flags, attrd_value_remote)) { // A Pacemaker Remote node's XML ID is the same as its name node_xml_id = v->nodename; @@ -573,12 +578,12 @@ write_attribute(attribute_t *a, bool ignore_delay) } else { // This creates a cluster node cache entry if none exists pcmk__node_status_t *peer = pcmk__get_node(0, v->nodename, - v->node_xml_id, + prev_xml_id, pcmk__node_search_any); node_xml_id = pcmk__cluster_get_xml_id(peer); if (node_xml_id == NULL) { - node_xml_id = v->node_xml_id; + node_xml_id = prev_xml_id; } } @@ -591,15 +596,11 @@ write_attribute(attribute_t *a, bool ignore_delay) continue; } - /* Remember the XML ID and let peers know it (in case one of them - * becomes the writer later) - */ - if (!pcmk__str_eq(v->node_xml_id, node_xml_id, pcmk__str_none)) { + if (!pcmk__str_eq(prev_xml_id, node_xml_id, pcmk__str_none)) { crm_trace("Setting %s[%s] node XML ID to %s (was %s)", a->id, v->nodename, node_xml_id, - pcmk__s(v->node_xml_id, "unknown")); - pcmk__str_update(&(v->node_xml_id), node_xml_id); - attrd_broadcast_value(a, v); + pcmk__s(prev_xml_id, "unknown")); + attrd_set_node_xml_id(v->nodename, node_xml_id); } // Update this value as part of the CIB transaction we're building diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c index 02816b94d20..e97e09cb865 100644 --- a/daemons/attrd/attrd_corosync.c +++ b/daemons/attrd/attrd_corosync.c @@ -222,6 +222,7 @@ update_attr_on_host(attribute_t *a, const pcmk__node_status_t *peer, int is_remote = 0; bool changed = false; attribute_value_t *v = NULL; + const char *prev_xml_id = NULL; const char *node_xml_id = crm_element_value(xml, PCMK__XA_ATTR_HOST_ID); // Create entry for value if not already existing @@ -236,8 +237,9 @@ update_attr_on_host(attribute_t *a, const pcmk__node_status_t *peer, /* If update doesn't contain the node XML ID, fall back to any previously * known value (for logging) */ + prev_xml_id = attrd_get_node_xml_id(v->nodename); if (node_xml_id == NULL) { - node_xml_id = v->node_xml_id; + node_xml_id = prev_xml_id; } // If value is for a Pacemaker Remote node, remember that @@ -317,11 +319,11 @@ update_attr_on_host(attribute_t *a, const pcmk__node_status_t *peer, // Remember node's XML ID if we're just learning it if ((node_xml_id != NULL) - && !pcmk__str_eq(node_xml_id, v->node_xml_id, pcmk__str_none)) { + && !pcmk__str_eq(node_xml_id, prev_xml_id, pcmk__str_none)) { crm_trace("Learned %s[%s] node XML ID is %s (was %s)", a->id, v->nodename, node_xml_id, - pcmk__s(v->node_xml_id, "unknown")); - pcmk__str_update(&(v->node_xml_id), node_xml_id); + pcmk__s(prev_xml_id, "unknown")); + attrd_set_node_xml_id(v->nodename, node_xml_id); if (attrd_election_won()) { // In case we couldn't write a value missing the XML ID before attrd_write_attributes(attrd_write_changed); @@ -540,6 +542,7 @@ attrd_peer_remove(const char *host, bool uncache, const char *source) if (uncache) { pcmk__purge_node_from_cache(host, 0); + attrd_forget_node_xml_id(host); } } diff --git a/daemons/attrd/attrd_nodes.c b/daemons/attrd/attrd_nodes.c new file mode 100644 index 00000000000..8fb7797f2d9 --- /dev/null +++ b/daemons/attrd/attrd_nodes.c @@ -0,0 +1,82 @@ +/* + * Copyright 2024-2025 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include + +#include // NULL +#include // GHashTable, etc. + +#include "pacemaker-attrd.h" + +// Track the last known node XML ID for each node name +static GHashTable *node_xml_ids = NULL; + +/*! + * \internal + * \brief Get last known XML ID for a given node + * + * \param[in] node_name Name of node to check + * + * \return Last known XML ID for node (or NULL if none known) + * + * \note The return value may become invalid if attrd_set_node_xml_id() or + * attrd_forget_node_xml_id() is later called for \p node_name. + */ +const char * +attrd_get_node_xml_id(const char *node_name) +{ + if (node_xml_ids == NULL) { + return NULL; + } + return g_hash_table_lookup(node_xml_ids, node_name); +} + +/*! + * \internal + * \brief Set last known XML ID for a given node + * + * \param[in] node_name Name of node to set + * \param[in] node_xml_id New XML ID to set for node + */ +void +attrd_set_node_xml_id(const char *node_name, const char *node_xml_id) +{ + if (node_xml_ids == NULL) { + node_xml_ids = pcmk__strikey_table(free, free); + } + pcmk__insert_dup(node_xml_ids, node_name, node_xml_id); +} + +/*! + * \internal + * \brief Forget last known XML ID for a given node + * + * \param[in] node_name Name of node to forget + */ +void +attrd_forget_node_xml_id(const char *node_name) +{ + if (node_xml_ids == NULL) { + return; + } + g_hash_table_remove(node_xml_ids, node_name); +} + +/*! + * \internal + * \brief Free the node XML ID cache + */ +void +attrd_cleanup_xml_ids(void) +{ + if (node_xml_ids != NULL) { + g_hash_table_destroy(node_xml_ids); + node_xml_ids = NULL; + } +} diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c index 3621f5f3543..f219b8862d3 100644 --- a/daemons/attrd/attrd_utils.c +++ b/daemons/attrd/attrd_utils.c @@ -232,7 +232,6 @@ attrd_free_attribute_value(gpointer data) free(v->nodename); free(v->current); free(v->requested); - free(v->node_xml_id); free(v); } diff --git a/daemons/attrd/pacemaker-attrd.c b/daemons/attrd/pacemaker-attrd.c index a5dac1272af..3c31bcd932e 100644 --- a/daemons/attrd/pacemaker-attrd.c +++ b/daemons/attrd/pacemaker-attrd.c @@ -207,6 +207,8 @@ main(int argc, char **argv) g_hash_table_destroy(attributes); } + attrd_cleanup_xml_ids(); + g_strfreev(processed_args); pcmk__free_arg_context(context); diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h index f0535eabaa0..57d707c37cd 100644 --- a/daemons/attrd/pacemaker-attrd.h +++ b/daemons/attrd/pacemaker-attrd.h @@ -252,4 +252,10 @@ bool attrd_request_has_sync_point(xmlNode *xml); extern gboolean stand_alone; +// Node utilities (from attrd_nodes.c) +const char *attrd_get_node_xml_id(const char *node_name); +void attrd_set_node_xml_id(const char *node_name, const char *node_xml_id); +void attrd_forget_node_xml_id(const char *node_name); +void attrd_cleanup_xml_ids(void); + #endif /* PACEMAKER_ATTRD__H */ From 5025e575cfb6118b4fa4d55e80e6425de03f41d2 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 14 Jan 2025 09:24:15 -0600 Subject: [PATCH 13/16] Refactor: pacemaker-attrd: drop unused struct member --- daemons/attrd/pacemaker-attrd.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h index 57d707c37cd..cc0dcf29ee4 100644 --- a/daemons/attrd/pacemaker-attrd.h +++ b/daemons/attrd/pacemaker-attrd.h @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024 the Pacemaker project contributors + * Copyright 2013-2025 the Pacemaker project contributors * * The version control history for this file may have further details. * @@ -163,7 +163,6 @@ typedef struct attribute_value_s { char *nodename; // Node that this value is for char *current; // Attribute value char *requested; // Value specified in pending CIB write, if any - char *node_xml_id; // XML ID used for node in CIB uint32_t flags; // Group of attrd_value_flags } attribute_value_t; From c709b083d2e798970de0d4df5758764203d105b9 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 14 Jan 2025 14:11:04 -0600 Subject: [PATCH 14/16] Low: libcrmcluster: better detect remote nodes in peer cache --- lib/cluster/membership.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c index e033f4e7545..04bcc396f75 100644 --- a/lib/cluster/membership.c +++ b/lib/cluster/membership.c @@ -158,7 +158,13 @@ pcmk__cluster_lookup_remote_node(const char *node_name) */ node = pcmk__search_node_caches(0, node_name, NULL, pcmk__node_search_cluster_member); - if ((node != NULL) && (node->xml_id == NULL)) { + if ((node != NULL) + && ((node->xml_id == NULL) + /* This assumes only Pacemaker Remote nodes have their XML ID the + * same as their node name + */ + || pcmk__str_eq(node->name, node->xml_id, pcmk__str_none))) { + /* node_name could be a pointer into the cache entry being removed, so * reassign it to a copy before the original gets freed */ From 200b0896455e243f2840b9849eb3a9230315c85f Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 14 Jan 2025 14:11:58 -0600 Subject: [PATCH 15/16] Refactor: controller: drop unused argument --- daemons/controld/controld_fencing.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c index 7565b6c6c40..e5f03ef51c3 100644 --- a/daemons/controld/controld_fencing.c +++ b/daemons/controld/controld_fencing.c @@ -208,8 +208,7 @@ cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, } static void -send_stonith_update(pcmk__graph_action_t *action, const char *target, - const char *uuid) +send_stonith_update(const char *target, const char *uuid) { int rc = pcmk_ok; pcmk__node_status_t *peer = NULL; @@ -387,7 +386,7 @@ execute_stonith_cleanup(void) const char *uuid = pcmk__cluster_get_xml_id(target_node); crm_notice("Marking %s, target of a previous stonith action, as clean", target); - send_stonith_update(NULL, target, uuid); + send_stonith_update(target, uuid); free(target); } g_list_free(stonith_cleanup_list); @@ -602,7 +601,7 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event) if (AM_I_DC) { /* The DC always sends updates */ - send_stonith_update(NULL, event->target, uuid); + send_stonith_update(event->target, uuid); /* @TODO Ideally, at this point, we'd check whether the fenced node * hosted any guest nodes, and call remote_node_down() for them. @@ -639,7 +638,7 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event) * have them do so too after the election */ if (controld_is_local_node(event->executioner)) { - send_stonith_update(NULL, event->target, uuid); + send_stonith_update(event->target, uuid); } add_stonith_cleanup(event->target); } @@ -887,7 +886,7 @@ tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data) is_remote_node); } else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) { - send_stonith_update(action, target, uuid); + send_stonith_update(target, uuid); pcmk__set_graph_action_flags(action, pcmk__graph_action_sent_update); } From 3c1145cc6b520cca5180fc91c8345e666b09ebce Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 14 Jan 2025 14:19:23 -0600 Subject: [PATCH 16/16] Refactor: controller: best practices for send_stonith_update() Add a doxygen block, rename function to update_node_state_after_fencing() and uuid argument to target_xml_id for readability, and improve log messages, comments, and formatting. --- daemons/controld/controld_fencing.c | 53 ++++++++++++----------------- 1 file changed, 21 insertions(+), 32 deletions(-) diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c index e5f03ef51c3..49d1142cb36 100644 --- a/daemons/controld/controld_fencing.c +++ b/daemons/controld/controld_fencing.c @@ -207,11 +207,19 @@ cib_fencing_updated(xmlNode *msg, int call_id, int rc, xmlNode *output, } } +/*! + * \internal + * \brief Update a fencing target's node state + * + * \param[in] target Node that was successfully fenced + * \param[in] target_xml_id CIB XML ID of target + */ static void -send_stonith_update(const char *target, const char *uuid) +update_node_state_after_fencing(const char *target, const char *target_xml_id) { int rc = pcmk_ok; pcmk__node_status_t *peer = NULL; + xmlNode *node_state = NULL; /* We (usually) rely on the membership layer to do node_update_cluster, * and the peer status callback to do node_update_peer, because the node @@ -219,18 +227,10 @@ send_stonith_update(const char *target, const char *uuid) */ int flags = node_update_join | node_update_expected; - /* zero out the node-status & remove all LRM status info */ - xmlNode *node_state = NULL; - - CRM_CHECK(target != NULL, return); - CRM_CHECK(uuid != NULL, return); - - /* Make sure the membership and join caches are accurate. - * Try getting any existing node cache entry also by node uuid in case it - * doesn't have an uname yet. - */ - peer = pcmk__get_node(0, target, uuid, pcmk__node_search_any); + CRM_CHECK((target != NULL) && (target_xml_id != NULL), return); + // Ensure target is cached + peer = pcmk__get_node(0, target, target_xml_id, pcmk__node_search_any); CRM_CHECK(peer != NULL, return); if (peer->state == NULL) { @@ -242,16 +242,15 @@ send_stonith_update(const char *target, const char *uuid) } if (peer->xml_id == NULL) { - crm_info("Recording XML ID '%s' for node '%s'", uuid, target); - peer->xml_id = pcmk__str_copy(uuid); + crm_info("Recording XML ID '%s' for node '%s'", target_xml_id, target); + peer->xml_id = pcmk__str_copy(target_xml_id); } crmd_peer_down(peer, TRUE); - /* Generate a node state update for the CIB */ node_state = create_node_state_update(peer, flags, NULL, __func__); + crm_xml_add(node_state, PCMK_XA_ID, target_xml_id); - /* we have to mark whether or not remote nodes have already been fenced */ if (pcmk_is_set(peer->flags, pcmk__node_status_remote)) { char *now_s = pcmk__ttoa(time(NULL)); @@ -259,25 +258,15 @@ send_stonith_update(const char *target, const char *uuid) free(now_s); } - /* Force our known ID */ - crm_xml_add(node_state, PCMK_XA_ID, uuid); - rc = controld_globals.cib_conn->cmds->modify(controld_globals.cib_conn, PCMK_XE_STATUS, node_state, cib_can_create); + pcmk__xml_free(node_state); - /* Delay processing the trigger until the update completes */ - crm_debug("Sending fencing update %d for %s", rc, target); + crm_debug("Updating node state for %s after fencing (call %d)", target, rc); fsa_register_cib_callback(rc, pcmk__str_copy(target), cib_fencing_updated); - // Make sure it sticks - /* controld_globals.cib_conn->cmds->bump_epoch(controld_globals.cib_conn, - * cib_none); - */ - controld_delete_node_state(peer->name, controld_section_all, cib_none); - pcmk__xml_free(node_state); - return; } /*! @@ -386,7 +375,7 @@ execute_stonith_cleanup(void) const char *uuid = pcmk__cluster_get_xml_id(target_node); crm_notice("Marking %s, target of a previous stonith action, as clean", target); - send_stonith_update(target, uuid); + update_node_state_after_fencing(target, uuid); free(target); } g_list_free(stonith_cleanup_list); @@ -601,7 +590,7 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event) if (AM_I_DC) { /* The DC always sends updates */ - send_stonith_update(event->target, uuid); + update_node_state_after_fencing(event->target, uuid); /* @TODO Ideally, at this point, we'd check whether the fenced node * hosted any guest nodes, and call remote_node_down() for them. @@ -638,7 +627,7 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event) * have them do so too after the election */ if (controld_is_local_node(event->executioner)) { - send_stonith_update(event->target, uuid); + update_node_state_after_fencing(event->target, uuid); } add_stonith_cleanup(event->target); } @@ -886,7 +875,7 @@ tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data) is_remote_node); } else if (!(pcmk_is_set(action->flags, pcmk__graph_action_sent_update))) { - send_stonith_update(target, uuid); + update_node_state_after_fencing(target, uuid); pcmk__set_graph_action_flags(action, pcmk__graph_action_sent_update); }