diff --git a/src/sbd-cluster.c b/src/sbd-cluster.c index b6c5512..7d61bd3 100644 --- a/src/sbd-cluster.c +++ b/src/sbd-cluster.c @@ -58,7 +58,16 @@ static crm_cluster_t cluster; static gboolean sbd_remote_check(gpointer user_data); static long unsigned int find_pacemaker_remote(void); static void sbd_membership_destroy(gpointer user_data); +static bool wait_for_pacemaker_remote_lost = false; +static void signal_exitreq(void) +{ + union sigval signal_value; + pid_t ppid = getppid(); + + memset(&signal_value, 0, sizeof(signal_value)); + sigqueue(ppid, SIG_EXITREQ, signal_value); +} #if SUPPORT_PLUGIN static void @@ -675,6 +684,10 @@ sbd_remote_check(gpointer user_data) set_servant_health(pcmk_health_online, LOG_INFO, "Connected to Pacemaker Remote %lu", (long unsigned int)remoted_pid); } else { + if (wait_for_pacemaker_remote_lost) { + signal_exitreq(); + return true; + } set_servant_health(pcmk_health_unclean, LOG_WARNING, "Connection to Pacemaker Remote %lu lost", (long unsigned int)remoted_pid); } @@ -742,6 +755,16 @@ cluster_shutdown(int nsig) clean_up(0); } +static void +trigger_wait_for_pacemaker_remote_lost(int nsig) +{ + /* if we've never seen pacemaker_remoted request exit immeditely */ + if ((remoted_pid <= 0) || !remote_node) { + signal_exitreq(); + } + wait_for_pacemaker_remote_lost = true; +} + int servant_cluster(const char *diskname, int mode, const void* argp) { @@ -761,6 +784,7 @@ servant_cluster(const char *diskname, int mode, const void* argp) mainloop_add_signal(SIGTERM, cluster_shutdown); mainloop_add_signal(SIGINT, cluster_shutdown); + mainloop_add_signal(SIGUSR2, trigger_wait_for_pacemaker_remote_lost); g_main_loop_run(mainloop); g_main_loop_unref(mainloop); diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c index d917cd1..6e56120 100644 --- a/src/sbd-inquisitor.c +++ b/src/sbd-inquisitor.c @@ -248,14 +248,14 @@ void servants_start(void) } } -void servants_kill(void) +void servants_kill(int sig) { struct servants_list_item *s; union sigval svalue; for (s = servants_leader; s; s = s->next) { if (s->pid != 0) - sigqueue(s->pid, SIGKILL, svalue); + sigqueue(s->pid, sig, svalue); } } @@ -536,7 +536,7 @@ void inquisitor_child(void) clock_gettime(CLOCK_MONOTONIC, &t_now); if (sig == SIG_EXITREQ || sig == SIGTERM) { - servants_kill(); + servants_kill(SIGKILL); watchdog_close(true); exiting = 1; } else if (sig == SIGCHLD) { @@ -610,6 +610,8 @@ void inquisitor_child(void) if (exiting) continue; servants_start(); + } else if (sig == SIGUSR2) { + servants_kill(SIGUSR2); } if (exiting) { @@ -718,7 +720,7 @@ void inquisitor_child(void) */ cl_log(LOG_DEBUG, "Decoupling"); if (inquisitor_decouple() < 0) { - servants_kill(); + servants_kill(SIGKILL); exiting = 1; continue; } else { @@ -734,7 +736,7 @@ void inquisitor_child(void) /* We're still being watched by our * parent. We don't fence, but exit. */ cl_log(LOG_ERR, "SBD: Not enough votes to proceed. Aborting start-up."); - servants_kill(); + servants_kill(SIGKILL); exiting = 1; continue; } diff --git a/src/sbd_remote.service.in b/src/sbd_remote.service.in index 56675b1..39cfea4 100644 --- a/src/sbd_remote.service.in +++ b/src/sbd_remote.service.in @@ -11,7 +11,7 @@ Type=forking PIDFile=@runstatedir@/sbd.pid EnvironmentFile=-@CONFIGDIR@/sbd ExecStart=@sbindir@/sbd $SBD_OPTS -p @runstatedir@/sbd.pid watch -ExecStop=@bindir@/kill -TERM $MAINPID +ExecStop=@bindir@/kill -USR2 $MAINPID # Could this benefit from exit codes for restart? # Does this need to be set to msgwait * 1.2?