From 793385e0a4329bab9c44f479b196bc69e753927a Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Fri, 26 Apr 2024 14:12:50 -0400 Subject: [PATCH] freebsd-relayd: Fix problems with HCE/PFE/relay desync after reload It has been observed that relayd will sometimes exit with an assertion failure immediately after its config has been reloaded (i.e., SIGHUP was sent or "relayctl reload" was used). Usually it's because the PFE was handling a host status update (IMSG_HOST_STATUS) and failed to find the host in question or the check counts aren't matching. relayd configuration reloads are implemented by the parent, which re-reads the configuration file, builds up a new set of data structures (hosts, tables, redirects, etc.), and sends copies to each worker. The sequence is: 1. parent -> IMSG_CTL_RESET -> workers, causing workers to free all of the existing data structures. 2. parent -> IMSG_CFG_{TABLE,HOST,...} -> workers, giving state to workers. 3. parent -> IMSG_CFG_DONE -> workers, indicating that it's done with the configuration update. 4. workers -> IMSG_CFG_DONE -> parent, indicating workers are ready to start. 5. parent -> IMSG_CFG_START -> workers, only once all messages in step 4 have been received by the parent. The structures sent in step 2 each have a unique ID (modulo 32-bit object ID rollover), so a host object ID is only valid in one "configuration epoch". When a reload starts, a new configuration epoch begins, and all IDs belonging to the previous epoch become invalid. The basic problem is that there is no global ordering of IPC messages sent between workers and the parent. Each process in relayd receives messages from multiple queues, and these can be interleaved or delayed indefinitely. For instance, IMSG_HOST_STATUS (sent from HCE -> PFE, and PFE -> relay) refers to a host by ID, but there is nothing ensuring that the status update is handled in the same epoch in which it was sent. Such mismatches result in the assertion failures mentioned above. One solution is simply to remove checks and drop messages if they refer to non-existent objects. However, this might hide bugs and generally makes the code harder to reason about. My solution is to make the notion of configuration epoch explicit in relayd worker state, and to tag certain messages with the current epoch. Then workers can decide what to do about mismatches. So: 1. Add a 64-bit epoch counter, incremented in step 1 of the reload procedure above. That is, each worker has a notion of the current epoch, incremented when IMSG_CTL_RESET is received. 2. When the HCE schedules host checks, it stamps the host with the current epoch. When IMSG_HOST_STATUS is set to the PFE and to relays, the receiver drops the message if the epoch doesn't match. This ensures that old host status messages are correctly ignored. 3. "script" checks involve a round-trip through the parent process which isn't cancelled when the HCE receives IMSG_CTL_RESET. Thus, imbue script state with the current epoch as well. 4. Ensure that the PFE and HCE actually disable events when CTL_RESET is received. Previously, hce_disable_events() and pfe_disable_events() weren't getting called at all. 5. Make sure that relayctl can't be used to schedule a check while a configuration reload is in progress. This was sufficient to eliminate assertion failures in a stress test which spams relayd with SIGHUP and relayctl poll commands. I suspect that my coverage of item 5 above isn't wide enough; that is, other relayctl commands (e.g., relayctl table disable) might still cause problems. --- usr.sbin/relayd/check_script.c | 7 +++++++ usr.sbin/relayd/config.c | 4 ++++ usr.sbin/relayd/hce.c | 23 +++++++++++++++-------- usr.sbin/relayd/pfe.c | 14 ++++++++++---- usr.sbin/relayd/relay.c | 2 ++ usr.sbin/relayd/relayd.h | 7 ++++++- 6 files changed, 44 insertions(+), 13 deletions(-) diff --git a/usr.sbin/relayd/check_script.c b/usr.sbin/relayd/check_script.c index d0cd1f6fc7ff..e162a0942ee2 100644 --- a/usr.sbin/relayd/check_script.c +++ b/usr.sbin/relayd/check_script.c @@ -48,6 +48,7 @@ check_script(struct relayd *env, struct host *host) host->flags &= ~(F_CHECK_SENT|F_CHECK_DONE); scr.host = host->conf.id; + scr.config_gen = env->sc_config_gen; if ((strlcpy(scr.name, host->conf.name,sizeof(scr.name)) >= sizeof(scr.name)) || (strlcpy(scr.path, table->conf.path, sizeof(scr.path)) >= @@ -65,6 +66,12 @@ script_done(struct relayd *env, struct ctl_script *scr) { struct host *host; + if (scr->config_gen != env->sc_config_gen) { + log_debug("script check for %s interrupted, ignoring", + scr->name); + return; + } + if ((host = host_find(env, scr->host)) == NULL) fatalx("%s: invalid host id", __func__); diff --git a/usr.sbin/relayd/config.c b/usr.sbin/relayd/config.c index 48c35c0b1056..a6a626d70421 100644 --- a/usr.sbin/relayd/config.c +++ b/usr.sbin/relayd/config.c @@ -153,6 +153,8 @@ config_purge(struct relayd *env, u_int reset) struct keyname *keyname; u_int what; + env->sc_started = 0; + what = ps->ps_what[privsep_process] & reset; if (what & CONFIG_TABLES && env->sc_tables != NULL) { @@ -265,6 +267,8 @@ config_getreset(struct relayd *env, struct imsg *imsg) config_purge(env, mode); + env->sc_config_gen++; + return (0); } diff --git a/usr.sbin/relayd/hce.c b/usr.sbin/relayd/hce.c index a1cb0683bfed..a667ef9ea6d2 100644 --- a/usr.sbin/relayd/hce.c +++ b/usr.sbin/relayd/hce.c @@ -82,11 +82,10 @@ hce_setup_events(void) struct timeval tv; struct table *table; - if (!event_initialized(&env->sc_ev)) { + if (!event_initialized(&env->sc_ev)) evtimer_set(&env->sc_ev, hce_launch_checks, env); - bzero(&tv, sizeof(tv)); - evtimer_add(&env->sc_ev, &tv); - } + bzero(&tv, sizeof(tv)); + evtimer_add(&env->sc_ev, &tv); if (env->sc_conf.flags & F_TLS) { TAILQ_FOREACH(table, env->sc_tables, entry) { @@ -100,6 +99,8 @@ hce_setup_events(void) tls_config_insecure_noverifyname(table->tls_cfg); } } + + env->sc_started = 1; } void @@ -170,6 +171,7 @@ hce_launch_checks(int fd, short event, void *arg) continue; bcopy(&tv, &host->cte.tv_start, sizeof(host->cte.tv_start)); + host->config_gen = env->sc_config_gen; switch (table->conf.check) { case CHECK_ICMP: schedule_icmp(env, host); @@ -257,6 +259,7 @@ hce_notify_done(struct host *host, enum host_error he) st.up = host->up; st.check_cnt = host->check_cnt; st.retry_cnt = host->retry_cnt; + st.config_gen = host->config_gen; st.he = he; host->flags |= (F_CHECK_SENT|F_CHECK_DONE); msg = host_error(he); @@ -305,6 +308,7 @@ hce_dispatch_pfe(int fd, struct privsep_proc *p, struct imsg *imsg) objid_t id; struct host *host; struct table *table; + u_int64_t config_gen; switch (imsg->hdr.type) { case IMSG_HOST_DISABLE: @@ -342,10 +346,12 @@ hce_dispatch_pfe(int fd, struct privsep_proc *p, struct imsg *imsg) host->up = HOST_UNKNOWN; break; case IMSG_CTL_POLL: - evtimer_del(&env->sc_ev); - TAILQ_FOREACH(table, env->sc_tables, entry) - table->skipped = 0; - hce_launch_checks(-1, EV_TIMEOUT, env); + if (env->sc_started) { + evtimer_del(&env->sc_ev); + TAILQ_FOREACH(table, env->sc_tables, entry) + table->skipped = 0; + hce_launch_checks(-1, EV_TIMEOUT, env); + } break; default: return (-1); @@ -378,6 +384,7 @@ hce_dispatch_parent(int fd, struct privsep_proc *p, struct imsg *imsg) hce_setup_events(); break; case IMSG_CTL_RESET: + hce_disable_events(); config_getreset(env, imsg); break; default: diff --git a/usr.sbin/relayd/pfe.c b/usr.sbin/relayd/pfe.c index d17f819ccde4..7b848c4e56dd 100644 --- a/usr.sbin/relayd/pfe.c +++ b/usr.sbin/relayd/pfe.c @@ -125,11 +125,12 @@ pfe_setup_events(void) struct timeval tv; /* Schedule statistics timer */ - if (!event_initialized(&env->sc_statev)) { + if (!event_initialized(&env->sc_statev)) evtimer_set(&env->sc_statev, pfe_statistics, NULL); - bcopy(&env->sc_conf.statinterval, &tv, sizeof(tv)); - evtimer_add(&env->sc_statev, &tv); - } + bcopy(&env->sc_conf.statinterval, &tv, sizeof(tv)); + evtimer_add(&env->sc_statev, &tv); + + env->sc_started = 1; } void @@ -151,6 +152,10 @@ pfe_dispatch_hce(int fd, struct privsep_proc *p, struct imsg *imsg) case IMSG_HOST_STATUS: IMSG_SIZE_CHECK(imsg, &st); memcpy(&st, imsg->data, sizeof(st)); + if (st.config_gen != env->sc_config_gen) { + log_debug("%s: reload gen mismatch", __func__); + break; + } if ((host = host_find(env, st.id)) == NULL) fatalx("%s: invalid host id", __func__); host->he = st.he; @@ -286,6 +291,7 @@ pfe_dispatch_relay(int fd, struct privsep_proc *p, struct imsg *imsg) struct relay *rlay; struct ctl_conn *c; struct rsession con, *s, *t; + u_int64_t config_gen; int cid; objid_t sid; diff --git a/usr.sbin/relayd/relay.c b/usr.sbin/relayd/relay.c index de94a2d9c3de..d51428369809 100644 --- a/usr.sbin/relayd/relay.c +++ b/usr.sbin/relayd/relay.c @@ -1976,6 +1976,8 @@ relay_dispatch_pfe(int fd, struct privsep_proc *p, struct imsg *imsg) case IMSG_HOST_STATUS: IMSG_SIZE_CHECK(imsg, &st); memcpy(&st, imsg->data, sizeof(st)); + if (st.config_gen != env->sc_config_gen) + break; if ((host = host_find(env, st.id)) == NULL) fatalx("%s: invalid host id", __func__); if (host->flags & F_DISABLE) diff --git a/usr.sbin/relayd/relayd.h b/usr.sbin/relayd/relayd.h index 40e31aebd869..6544c5282fce 100644 --- a/usr.sbin/relayd/relayd.h +++ b/usr.sbin/relayd/relayd.h @@ -162,6 +162,7 @@ struct ctl_status { int up; int retry_cnt; u_long check_cnt; + u_int64_t config_gen; u_int16_t he; }; @@ -193,6 +194,7 @@ struct ctl_relayfd { struct ctl_script { objid_t host; + u_int64_t config_gen; int retval; struct timeval timeout; char name[HOST_NAME_MAX+1]; @@ -481,6 +483,7 @@ struct host { u_long up_cnt; int retry_cnt; int idx; + u_int64_t config_gen; u_int16_t he; int code; struct ctl_tcp_event cte; @@ -1059,7 +1062,7 @@ enum imsg_type { IMSG_CA_PRIVDEC, IMSG_SESS_PUBLISH, /* from relay to pfe */ IMSG_SESS_UNPUBLISH, - IMSG_TLSTICKET_REKEY + IMSG_TLSTICKET_REKEY, }; enum privsep_procid { @@ -1206,6 +1209,8 @@ struct relayd { struct privsep *sc_ps; int sc_reload; + int sc_started; + u_int64_t sc_config_gen; }; #define RELAYD_OPT_VERBOSE 0x01