Skip to content

Commit

Permalink
Merge pull request #3796 from kgaillot/T137
Browse files Browse the repository at this point in the history
Track node's XML ID rather than cluster ID for attribute write-outs in pacemaker-attrd
  • Loading branch information
kgaillot authored Jan 14, 2025
2 parents 9931db7 + 3c1145c commit 7dc494a
Show file tree
Hide file tree
Showing 26 changed files with 330 additions and 160 deletions.
1 change: 1 addition & 0 deletions daemons/attrd/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ pacemaker_attrd_SOURCES = attrd_alerts.c \
attrd_elections.c \
attrd_ipc.c \
attrd_messages.c \
attrd_nodes.c \
attrd_sync.c \
attrd_utils.c \
pacemaker-attrd.c
12 changes: 11 additions & 1 deletion daemons/attrd/attrd_alerts.c
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,22 @@ attrd_read_options(gpointer user_data)
}

int
attrd_send_attribute_alert(const char *node, int nodeid,
attrd_send_attribute_alert(const char *node, const char *node_xml_id,
const char *attr, const char *value)
{
uint32_t nodeid = 0U;
pcmk__node_status_t *node_status = NULL;

if (attrd_alert_list == NULL) {
return pcmk_ok;
}
node_status = pcmk__search_node_caches(0, node, node_xml_id,
pcmk__node_search_remote
|pcmk__node_search_cluster_member
|pcmk__node_search_cluster_cib);
if (node_status != NULL) {
nodeid = node_status->cluster_layer_id;
}
return lrmd_send_attribute_alert(attrd_lrmd_connect(), attrd_alert_list,
node, nodeid, attr, value);
}
16 changes: 12 additions & 4 deletions daemons/attrd/attrd_attributes.c
Original file line number Diff line number Diff line change
Expand Up @@ -142,15 +142,23 @@ attrd_add_value_xml(xmlNode *parent, const attribute_t *a,
crm_xml_add(xml, PCMK__XA_ATTR_SET_TYPE, a->set_type);
crm_xml_add(xml, PCMK__XA_ATTR_SET, a->set_id);
crm_xml_add(xml, PCMK__XA_ATTR_USER, a->user);
pcmk__xe_add_node(xml, v->nodename, v->nodeid);
if (pcmk_is_set(v->flags, attrd_value_remote)) {
crm_xml_add_int(xml, PCMK__XA_ATTR_IS_REMOTE, 1);
}
crm_xml_add(xml, PCMK__XA_ATTR_HOST, v->nodename);

/* @COMPAT Prior to 2.1.10 and 3.0.1, the node's cluster ID was added
* instead of its XML ID. For Corosync and Pacemaker Remote nodes, those are
* the same, but if we ever support node XML IDs that differ from their
* cluster IDs, we will have to drop support for rolling upgrades from
* versions before those.
*/
crm_xml_add(xml, PCMK__XA_ATTR_HOST_ID, attrd_get_node_xml_id(v->nodename));

crm_xml_add(xml, PCMK__XA_ATTR_VALUE, v->current);
crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING,
pcmk__timeout_ms2s(a->timeout_ms));
crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE,
pcmk_is_set(a->flags, attrd_attr_is_private));
crm_xml_add_int(xml, PCMK__XA_ATTR_IS_REMOTE,
pcmk_is_set(v->flags, attrd_value_remote));
crm_xml_add_int(xml, PCMK__XA_ATTRD_IS_FORCE_WRITE, force_write);

return xml;
Expand Down
105 changes: 63 additions & 42 deletions daemons/attrd/attrd_cib.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
#include <crm_internal.h>

#include <errno.h>
#include <inttypes.h> // PRIu32
#include <stdbool.h>
#include <stdlib.h>
#include <glib.h>
Expand Down Expand Up @@ -450,10 +449,15 @@ send_alert_attributes_value(attribute_t *a, GHashTable *t)
g_hash_table_iter_init(&vIter, t);

while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & at)) {
rc = attrd_send_attribute_alert(at->nodename, at->nodeid,
const char *node_xml_id = attrd_get_node_xml_id(at->nodename);

rc = attrd_send_attribute_alert(at->nodename, node_xml_id,
a->id, at->current);
crm_trace("Sent alerts for %s[%s]=%s: nodeid=%d rc=%d",
a->id, at->nodename, at->current, at->nodeid, rc);
crm_trace("Sent alerts for %s[%s]=%s with node XML ID %s "
"(%s agents failed)",
a->id, at->nodename, at->current,
pcmk__s(node_xml_id, "unknown"),
((rc == 0)? "no" : ((rc == -1)? "some" : "all")));
}
}

Expand All @@ -462,7 +466,6 @@ set_alert_attribute_value(GHashTable *t, attribute_value_t *v)
{
attribute_value_t *a_v = pcmk__assert_alloc(1, sizeof(attribute_value_t));

a_v->nodeid = v->nodeid;
a_v->nodename = pcmk__str_copy(v->nodename);
a_v->current = pcmk__str_copy(v->current);

Expand Down Expand Up @@ -491,13 +494,19 @@ write_attribute(attribute_t *a, bool ignore_delay)
GHashTableIter iter;
GHashTable *alert_attribute_value = NULL;
int rc = pcmk_ok;
bool should_write = true;

if (a == NULL) {
return;
}

// Private attributes (or any in standalone mode) are not written to the CIB
if (stand_alone || pcmk_is_set(a->flags, attrd_attr_is_private)) {
should_write = false;
}

/* If this attribute will be written to the CIB ... */
if (!stand_alone && !pcmk_is_set(a->flags, attrd_attr_is_private)) {
if (should_write) {
/* Defer the write if now's not a good time */
if (a->update && (a->update < last_cib_op_done)) {
crm_info("Write out of '%s' continuing: update %d considered lost",
Expand Down Expand Up @@ -531,10 +540,13 @@ write_attribute(attribute_t *a, bool ignore_delay)
}
}

/* Attribute will be written shortly, so clear changed flag and force
* write flag, and initialize UUID missing flag to false.
/* The changed and force-write flags apply only to the next write,
* which this is, so clear them now. Also clear the "node unknown" flag
* because we will check whether it is known below and reset if appopriate.
*/
attrd_clear_attr_flags(a, attrd_attr_changed|attrd_attr_uuid_missing|attrd_attr_force_write);
attrd_clear_attr_flags(a, attrd_attr_changed
|attrd_attr_force_write
|attrd_attr_node_unknown);

/* Make the table for the attribute trap */
alert_attribute_value = pcmk__strikey_table(NULL,
Expand All @@ -543,58 +555,67 @@ write_attribute(attribute_t *a, bool ignore_delay)
/* Iterate over each peer value of this attribute */
g_hash_table_iter_init(&iter, a->values);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) {
const char *uuid = NULL;
const char *node_xml_id = NULL;
const char *prev_xml_id = NULL;

if (!should_write) {
private_updates++;
continue;
}

/* We need the node's CIB XML ID to write out its attributes, so look
* for it now. Check the node caches first, even if the ID was
* previously known (in case it changed), but use any previous value as
* a fallback.
*/

prev_xml_id = attrd_get_node_xml_id(v->nodename);

if (pcmk_is_set(v->flags, attrd_value_remote)) {
/* If this is a Pacemaker Remote node, the node's UUID is the same
* as its name, which we already have.
*/
uuid = v->nodename;
// A Pacemaker Remote node's XML ID is the same as its name
node_xml_id = v->nodename;

} else {
// This will create a cluster node cache entry if none exists
pcmk__node_status_t *peer = pcmk__get_node(v->nodeid, v->nodename,
NULL,
// This creates a cluster node cache entry if none exists
pcmk__node_status_t *peer = pcmk__get_node(0, v->nodename,
prev_xml_id,
pcmk__node_search_any);

uuid = peer->xml_id;

// Remember peer's node ID if we're just now learning it
if ((peer->cluster_layer_id != 0) && (v->nodeid == 0)) {
crm_trace("Learned ID %" PRIu32 " for node %s",
peer->cluster_layer_id, v->nodename);
v->nodeid = peer->cluster_layer_id;
node_xml_id = pcmk__cluster_get_xml_id(peer);
if (node_xml_id == NULL) {
node_xml_id = prev_xml_id;
}
}

/* If this is a private attribute, no update needs to be sent */
if (stand_alone || pcmk_is_set(a->flags, attrd_attr_is_private)) {
private_updates++;
continue;
}

// Defer write if this is a cluster node that's never been seen
if (uuid == NULL) {
attrd_set_attr_flags(a, attrd_attr_uuid_missing);
crm_notice("Cannot update %s[%s]='%s' now because node's UUID is "
"unknown (will retry if learned)",
if (node_xml_id == NULL) {
attrd_set_attr_flags(a, attrd_attr_node_unknown);
crm_notice("Cannot write %s[%s]='%s' to CIB because node's XML ID "
"is unknown (will retry if learned)",
a->id, v->nodename, v->current);
continue;
}

if (!pcmk__str_eq(prev_xml_id, node_xml_id, pcmk__str_none)) {
crm_trace("Setting %s[%s] node XML ID to %s (was %s)",
a->id, v->nodename, node_xml_id,
pcmk__s(prev_xml_id, "unknown"));
attrd_set_node_xml_id(v->nodename, node_xml_id);
}

// Update this value as part of the CIB transaction we're building
rc = add_attr_update(a, v->current, uuid);
rc = add_attr_update(a, v->current, node_xml_id);
if (rc != pcmk_rc_ok) {
crm_err("Failed to update %s[%s]='%s': %s "
QB_XS " node uuid=%s id=%" PRIu32,
crm_err("Couldn't add %s[%s]='%s' to CIB transaction: %s "
QB_XS " node XML ID %s",
a->id, v->nodename, v->current, pcmk_rc_str(rc),
uuid, v->nodeid);
node_xml_id);
continue;
}

crm_debug("Writing %s[%s]=%s (node-state-id=%s node-id=%" PRIu32 ")",
crm_debug("Added %s[%s]=%s to CIB transaction (node XML ID %s)",
a->id, v->nodename, pcmk__s(v->current, "(unset)"),
uuid, v->nodeid);
node_xml_id);
cib_updates++;

/* Preservation of the attribute to transmit alert */
Expand Down Expand Up @@ -656,8 +677,8 @@ attrd_write_attributes(uint32_t options)
pcmk_is_set(options, attrd_write_all)? "all" : "changed");
g_hash_table_iter_init(&iter, attributes);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) {
if (!pcmk_is_set(options, attrd_write_all) &&
pcmk_is_set(a->flags, attrd_attr_uuid_missing)) {
if (!pcmk_is_set(options, attrd_write_all)
&& pcmk_is_set(a->flags, attrd_attr_node_unknown)) {
// Try writing this attribute again, in case peer ID was learned
attrd_set_attr_flags(a, attrd_attr_changed);
} else if (pcmk_is_set(a->flags, attrd_attr_force_write)) {
Expand Down
45 changes: 25 additions & 20 deletions daemons/attrd/attrd_corosync.c
Original file line number Diff line number Diff line change
Expand Up @@ -209,19 +209,6 @@ attrd_peer_change_cb(enum pcmk__node_update kind, pcmk__node_status_t *peer,
}
}

static void
record_peer_nodeid(attribute_value_t *v, const char *host)
{
pcmk__node_status_t *known_peer =
pcmk__get_node(v->nodeid, host, NULL, pcmk__node_search_cluster_member);

crm_trace("Learned %s has node id %s",
known_peer->name, known_peer->xml_id);
if (attrd_election_won()) {
attrd_write_attributes(attrd_write_changed);
}
}

#define readable_value(rv_v) pcmk__s((rv_v)->current, "(unset)")

#define readable_peer(p) \
Expand All @@ -235,6 +222,8 @@ update_attr_on_host(attribute_t *a, const pcmk__node_status_t *peer,
int is_remote = 0;
bool changed = false;
attribute_value_t *v = NULL;
const char *prev_xml_id = NULL;
const char *node_xml_id = crm_element_value(xml, PCMK__XA_ATTR_HOST_ID);

// Create entry for value if not already existing
v = g_hash_table_lookup(a->values, host);
Expand All @@ -245,6 +234,14 @@ update_attr_on_host(attribute_t *a, const pcmk__node_status_t *peer,
g_hash_table_replace(a->values, v->nodename, v);
}

/* If update doesn't contain the node XML ID, fall back to any previously
* known value (for logging)
*/
prev_xml_id = attrd_get_node_xml_id(v->nodename);
if (node_xml_id == NULL) {
node_xml_id = prev_xml_id;
}

// If value is for a Pacemaker Remote node, remember that
crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote);
if (is_remote) {
Expand All @@ -270,11 +267,12 @@ update_attr_on_host(attribute_t *a, const pcmk__node_status_t *peer,

} else if (changed) {
crm_notice("Setting %s[%s]%s%s: %s -> %s "
QB_XS " from %s with %s write delay",
QB_XS " from %s with %s write delay and node XML ID %s",
attr, host, a->set_type ? " in " : "",
pcmk__s(a->set_type, ""), readable_value(v),
pcmk__s(value, "(unset)"), peer->name,
(a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms));
(a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms),
pcmk__s(node_xml_id, "unknown"));
pcmk__str_update(&v->current, value);
attrd_set_attr_flags(a, attrd_attr_changed);

Expand Down Expand Up @@ -319,11 +317,17 @@ update_attr_on_host(attribute_t *a, const pcmk__node_status_t *peer,
// This allows us to later detect local values that peer doesn't know about
attrd_set_value_flags(v, attrd_value_from_peer);

/* If this is a cluster node whose node ID we are learning, remember it */
if ((v->nodeid == 0) && !pcmk_is_set(v->flags, attrd_value_remote)
&& (crm_element_value_int(xml, PCMK__XA_ATTR_HOST_ID,
(int*)&v->nodeid) == 0) && (v->nodeid > 0)) {
record_peer_nodeid(v, host);
// Remember node's XML ID if we're just learning it
if ((node_xml_id != NULL)
&& !pcmk__str_eq(node_xml_id, prev_xml_id, pcmk__str_none)) {
crm_trace("Learned %s[%s] node XML ID is %s (was %s)",
a->id, v->nodename, node_xml_id,
pcmk__s(prev_xml_id, "unknown"));
attrd_set_node_xml_id(v->nodename, node_xml_id);
if (attrd_election_won()) {
// In case we couldn't write a value missing the XML ID before
attrd_write_attributes(attrd_write_changed);
}
}
}

Expand Down Expand Up @@ -538,6 +542,7 @@ attrd_peer_remove(const char *host, bool uncache, const char *source)

if (uncache) {
pcmk__purge_node_from_cache(host, 0);
attrd_forget_node_xml_id(host);
}
}

Expand Down
16 changes: 9 additions & 7 deletions daemons/attrd/attrd_ipc.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <errno.h>
#include <stdint.h>
#include <stdlib.h>
#include <inttypes.h> // PRIu32
#include <sys/types.h>

#include <crm/cluster.h>
Expand Down Expand Up @@ -155,7 +156,7 @@ attrd_client_peer_remove(pcmk__request_t *request)
pcmk__node_status_t *node = NULL;
char *host_alloc = NULL;

node = pcmk__search_node_caches(nodeid, NULL,
node = pcmk__search_node_caches(nodeid, NULL, NULL,
pcmk__node_search_cluster_member);
if ((node != NULL) && (node->name != NULL)) {
// Use cached name if available
Expand Down Expand Up @@ -232,12 +233,13 @@ attrd_client_refresh(pcmk__request_t *request)
static void
handle_missing_host(xmlNode *xml)
{
const char *host = crm_element_value(xml, PCMK__XA_ATTR_HOST);

if (host == NULL) {
crm_trace("Inferring host");
pcmk__xe_add_node(xml, attrd_cluster->priv->node_name,
attrd_cluster->priv->node_id);
if (crm_element_value(xml, PCMK__XA_ATTR_HOST) == NULL) {
crm_trace("Inferring local node %s with XML ID %s",
attrd_cluster->priv->node_name,
attrd_cluster->priv->node_xml_id);
crm_xml_add(xml, PCMK__XA_ATTR_HOST, attrd_cluster->priv->node_name);
crm_xml_add(xml, PCMK__XA_ATTR_HOST_ID,
attrd_cluster->priv->node_xml_id);
}
}

Expand Down
6 changes: 4 additions & 2 deletions daemons/attrd/attrd_messages.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include <crm_internal.h>

#include <inttypes.h> // PRIu32
#include <glib.h>

#include <crm/common/messages_internal.h>
Expand Down Expand Up @@ -314,8 +315,9 @@ attrd_broadcast_protocol(void)
crm_xml_add(attrd_op, PCMK__XA_ATTR_NAME, CRM_ATTR_PROTOCOL);
crm_xml_add(attrd_op, PCMK__XA_ATTR_VALUE, ATTRD_PROTOCOL_VERSION);
crm_xml_add_int(attrd_op, PCMK__XA_ATTR_IS_PRIVATE, 1);
pcmk__xe_add_node(attrd_op, attrd_cluster->priv->node_name,
attrd_cluster->priv->node_id);
crm_xml_add(attrd_op, PCMK__XA_ATTR_HOST, attrd_cluster->priv->node_name);
crm_xml_add(attrd_op, PCMK__XA_ATTR_HOST_ID,
attrd_cluster->priv->node_xml_id);

crm_debug("Broadcasting attrd protocol version %s for node %s",
ATTRD_PROTOCOL_VERSION, attrd_cluster->priv->node_name);
Expand Down
Loading

0 comments on commit 7dc494a

Please sign in to comment.