From 63a54b0fc0d77f269caba984b83bf244c3bb80b3 Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" Date: Sat, 17 Aug 2024 03:41:46 +0200 Subject: [PATCH 1/3] not all children die equally --- docs/source/settings.rst | 4 ++++ gunicorn/arbiter.py | 13 ++++++++++--- gunicorn/config.py | 4 ++++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/docs/source/settings.rst b/docs/source/settings.rst index e1e91fa76..28df0ba30 100644 --- a/docs/source/settings.rst +++ b/docs/source/settings.rst @@ -1100,6 +1100,10 @@ A filename to use for the PID file. If not set, no PID file will be written. +.. note:: + During master re-exec, a ``.2`` suffix is added to + this path to store the PID of the newly launched master. + .. _worker-tmp-dir: ``worker_tmp_dir`` diff --git a/gunicorn/arbiter.py b/gunicorn/arbiter.py index 1eaf453d5..04881a0a1 100644 --- a/gunicorn/arbiter.py +++ b/gunicorn/arbiter.py @@ -59,7 +59,9 @@ def __init__(self, app): self.pidfile = None self.systemd = False self.worker_age = 0 + # old master has != 0 until new master is dead or promoted self.reexec_pid = 0 + # new master has != 0 until old master is dead (until promotion) self.master_pid = 0 self.master_name = "Master" @@ -411,8 +413,10 @@ def reexec(self): master_pid = os.getpid() self.reexec_pid = os.fork() if self.reexec_pid != 0: + # old master return + # new master self.cfg.pre_exec(self) environ = self.cfg.env_orig.copy() @@ -517,7 +521,13 @@ def reap_workers(self): break if self.reexec_pid == wpid: self.reexec_pid = 0 + self.log.info("Master exited before promotion.") + continue else: + worker = self.WORKERS.pop(wpid, None) + if not worker: + self.log.debug("Non-worker subprocess (pid:%s) exited", wpid) + continue # A worker was terminated. If the termination reason was # that it could not boot, we'll shut it down to avoid # infinite start/stop cycles. @@ -552,9 +562,6 @@ def reap_workers(self): msg += " Perhaps out of memory?" self.log.error(msg) - worker = self.WORKERS.pop(wpid, None) - if not worker: - continue worker.tmp.close() self.cfg.child_exit(self, worker) except OSError as e: diff --git a/gunicorn/config.py b/gunicorn/config.py index 402a26b68..683dee7c8 100644 --- a/gunicorn/config.py +++ b/gunicorn/config.py @@ -1123,6 +1123,10 @@ class Pidfile(Setting): A filename to use for the PID file. If not set, no PID file will be written. + + .. note:: + During master re-exec, a ``.2`` suffix is added to + this path to store the PID of the newly launched master. """ From 41713117440f17dc9421488723cef1de65e1d7ed Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" Date: Fri, 23 Aug 2024 15:47:07 +0200 Subject: [PATCH 2/3] systemd: send MAINPID updates on re-exec --- gunicorn/arbiter.py | 21 +++++++++++++++++---- gunicorn/systemd.py | 8 ++++++++ tests/test_arbiter.py | 15 +++++++++------ 3 files changed, 34 insertions(+), 10 deletions(-) diff --git a/gunicorn/arbiter.py b/gunicorn/arbiter.py index 04881a0a1..cc137b5b1 100644 --- a/gunicorn/arbiter.py +++ b/gunicorn/arbiter.py @@ -67,8 +67,11 @@ def __init__(self, app): cwd = util.getcwd() - args = sys.argv[:] - args.insert(0, sys.executable) + if sys.version_info < (3, 10): + args = sys.argv[:] + args.insert(0, sys.executable) + else: + args = sys.orig_argv[:] # init start context self.START_CTX = { @@ -159,7 +162,7 @@ def start(self): self.log.debug("Arbiter booted") self.log.info("Listening at: %s (%s)", listeners_str, self.pid) self.log.info("Using worker: %s", self.cfg.worker_class_str) - systemd.sd_notify("READY=1\nSTATUS=Gunicorn arbiter booted", self.log) + systemd.sd_notify("READY=1\nSTATUS=Gunicorn arbiter booted\n", self.log) # check worker class requirements if hasattr(self.worker_class, "check_config"): @@ -251,7 +254,10 @@ def handle_hup(self): - Gracefully shutdown the old worker processes """ self.log.info("Hang up: %s", self.master_name) + systemd.sd_notify("RELOADING=1\nSTATUS=Gunicorn arbiter reloading..\n", self.log) self.reload() + # possibly premature, newly launched workers might have failed + systemd.sd_notify("READY=1\nSTATUS=Gunicorn arbiter reloaded\n", self.log) def handle_term(self): "SIGTERM handling" @@ -327,6 +333,8 @@ def maybe_promote_master(self): self.pidfile.rename(self.cfg.pidfile) # reset proctitle util._setproctitle("master [%s]" % self.proc_name) + # MAINPID does not change here, it was already set on fork + systemd.sd_notify("READY=1\nMAINPID=%d\nSTATUS=Gunicorn arbiter promoted\n" % (os.getpid(), ), self.log) def wakeup(self): """\ @@ -432,7 +440,10 @@ def reexec(self): os.chdir(self.START_CTX['cwd']) # exec the process using the original environment - os.execvpe(self.START_CTX[0], self.START_CTX['args'], environ) + self.log.debug("exe=%r argv=%r" % (self.START_CTX[0], self.START_CTX['args'])) + # let systemd know are are in control + systemd.sd_notify("READY=1\nMAINPID=%d\nSTATUS=Gunicorn arbiter re-exec\n" % (master_pid, ), self.log) + os.execve(self.START_CTX[0], self.START_CTX['args'], environ) def reload(self): old_address = self.cfg.address @@ -522,6 +533,8 @@ def reap_workers(self): if self.reexec_pid == wpid: self.reexec_pid = 0 self.log.info("Master exited before promotion.") + # let systemd know we are (back) in control + systemd.sd_notify("READY=1\nMAINPID=%d\nSTATUS=Gunicorn arbiter re-exec aborted\n" % (os.getpid(), ), self.log) continue else: worker = self.WORKERS.pop(wpid, None) diff --git a/gunicorn/systemd.py b/gunicorn/systemd.py index 9b1855060..1ce7f9d05 100644 --- a/gunicorn/systemd.py +++ b/gunicorn/systemd.py @@ -4,6 +4,7 @@ import os import socket +import time SD_LISTEN_FDS_START = 3 @@ -66,6 +67,13 @@ def sd_notify(state, logger, unset_environment=False): if addr[0] == '@': addr = '\0' + addr[1:] sock.connect(addr) + assert state.endswith("\n") + if "RELOADING" in state: # broad, but systemd man promises tolerating + # wrong clock on some platforms.. but this is only needed on Linux + # nsec = 10**-9 + # usec = 10**-6 + state += "MONOTONIC_USEC=%d\n" % (1_000*time.monotonic_ns(), ) + logger.debug("sd_notify: %r" % (state, )) sock.sendall(state.encode('utf-8')) except Exception: logger.debug("Exception while invoking sd_notify()", exc_info=True) diff --git a/tests/test_arbiter.py b/tests/test_arbiter.py index 8c1527e26..320e929c8 100644 --- a/tests/test_arbiter.py +++ b/tests/test_arbiter.py @@ -71,24 +71,27 @@ def test_arbiter_stop_does_not_unlink_when_using_reuse_port(close_sockets): @mock.patch('os.getpid') @mock.patch('os.fork') -@mock.patch('os.execvpe') -def test_arbiter_reexec_passing_systemd_sockets(execvpe, fork, getpid): +@mock.patch('os.execve') +@mock.patch('gunicorn.systemd.sd_notify') +def test_arbiter_reexec_passing_systemd_sockets(sd_notify, execve, fork, getpid): arbiter = gunicorn.arbiter.Arbiter(DummyApplication()) arbiter.LISTENERS = [mock.Mock(), mock.Mock()] arbiter.systemd = True fork.return_value = 0 + sd_notify.return_value = None getpid.side_effect = [2, 3] arbiter.reexec() - environ = execvpe.call_args[0][2] + environ = execve.call_args[0][2] assert environ['GUNICORN_PID'] == '2' assert environ['LISTEN_FDS'] == '2' assert environ['LISTEN_PID'] == '3' + sd_notify.assert_called_once() @mock.patch('os.getpid') @mock.patch('os.fork') -@mock.patch('os.execvpe') -def test_arbiter_reexec_passing_gunicorn_sockets(execvpe, fork, getpid): +@mock.patch('os.execve') +def test_arbiter_reexec_passing_gunicorn_sockets(execve, fork, getpid): arbiter = gunicorn.arbiter.Arbiter(DummyApplication()) listener1 = mock.Mock() listener2 = mock.Mock() @@ -98,7 +101,7 @@ def test_arbiter_reexec_passing_gunicorn_sockets(execvpe, fork, getpid): fork.return_value = 0 getpid.side_effect = [2, 3] arbiter.reexec() - environ = execvpe.call_args[0][2] + environ = execve.call_args[0][2] assert environ['GUNICORN_FD'] == '4,5' assert environ['GUNICORN_PID'] == '2' From ff883b468de2d9198dd161dfdee4c4afc9986a18 Mon Sep 17 00:00:00 2001 From: "Paul J. Dorn" Date: Sun, 8 Sep 2024 23:50:16 +0200 Subject: [PATCH 3/3] systemd: send STOPPING=1 on shutdown --- gunicorn/arbiter.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gunicorn/arbiter.py b/gunicorn/arbiter.py index cc137b5b1..49bebf0fe 100644 --- a/gunicorn/arbiter.py +++ b/gunicorn/arbiter.py @@ -348,6 +348,8 @@ def wakeup(self): def halt(self, reason=None, exit_status=0): """ halt arbiter """ + systemd.sd_notify("STOPPING=1\nSTATUS=Gunicorn shutting down..\n", self.log) + self.stop() log_func = self.log.info if exit_status == 0 else self.log.error