Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use original arguments on arbiter re-exec, and tell systemd so exiting old master does not terminate service #3285

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/source/settings.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1100,6 +1100,10 @@ A filename to use for the PID file.

If not set, no PID file will be written.

.. note::
During master re-exec, a ``.2`` suffix is added to
this path to store the PID of the newly launched master.

.. _worker-tmp-dir:

``worker_tmp_dir``
Expand Down
34 changes: 27 additions & 7 deletions gunicorn/arbiter.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,19 @@ def __init__(self, app):
self.pidfile = None
self.systemd = False
self.worker_age = 0
# old master has != 0 until new master is dead or promoted
self.reexec_pid = 0
# new master has != 0 until old master is dead (until promotion)
self.master_pid = 0
self.master_name = "Master"

cwd = util.getcwd()

args = sys.argv[:]
args.insert(0, sys.executable)
if sys.version_info < (3, 10):
args = sys.argv[:]
args.insert(0, sys.executable)
else:
args = sys.orig_argv[:]

# init start context
self.START_CTX = {
Expand Down Expand Up @@ -157,7 +162,7 @@ def start(self):
self.log.debug("Arbiter booted")
self.log.info("Listening at: %s (%s)", listeners_str, self.pid)
self.log.info("Using worker: %s", self.cfg.worker_class_str)
systemd.sd_notify("READY=1\nSTATUS=Gunicorn arbiter booted", self.log)
systemd.sd_notify("READY=1\nSTATUS=Gunicorn arbiter booted\n", self.log)

# check worker class requirements
if hasattr(self.worker_class, "check_config"):
Expand Down Expand Up @@ -249,7 +254,10 @@ def handle_hup(self):
- Gracefully shutdown the old worker processes
"""
self.log.info("Hang up: %s", self.master_name)
systemd.sd_notify("RELOADING=1\nSTATUS=Gunicorn arbiter reloading..\n", self.log)
self.reload()
# possibly premature, newly launched workers might have failed
systemd.sd_notify("READY=1\nSTATUS=Gunicorn arbiter reloaded\n", self.log)

def handle_term(self):
"SIGTERM handling"
Expand Down Expand Up @@ -325,6 +333,8 @@ def maybe_promote_master(self):
self.pidfile.rename(self.cfg.pidfile)
# reset proctitle
util._setproctitle("master [%s]" % self.proc_name)
# MAINPID does not change here, it was already set on fork
systemd.sd_notify("READY=1\nMAINPID=%d\nSTATUS=Gunicorn arbiter promoted\n" % (os.getpid(), ), self.log)

def wakeup(self):
"""\
Expand Down Expand Up @@ -411,8 +421,10 @@ def reexec(self):
master_pid = os.getpid()
self.reexec_pid = os.fork()
if self.reexec_pid != 0:
# old master
return

# new master
self.cfg.pre_exec(self)

environ = self.cfg.env_orig.copy()
Expand All @@ -428,7 +440,10 @@ def reexec(self):
os.chdir(self.START_CTX['cwd'])

# exec the process using the original environment
os.execvpe(self.START_CTX[0], self.START_CTX['args'], environ)
self.log.debug("exe=%r argv=%r" % (self.START_CTX[0], self.START_CTX['args']))
# let systemd know are are in control
systemd.sd_notify("READY=1\nMAINPID=%d\nSTATUS=Gunicorn arbiter re-exec\n" % (master_pid, ), self.log)
os.execve(self.START_CTX[0], self.START_CTX['args'], environ)

def reload(self):
old_address = self.cfg.address
Expand Down Expand Up @@ -517,7 +532,15 @@ def reap_workers(self):
break
if self.reexec_pid == wpid:
self.reexec_pid = 0
self.log.info("Master exited before promotion.")
# let systemd know we are (back) in control
systemd.sd_notify("READY=1\nMAINPID=%d\nSTATUS=Gunicorn arbiter re-exec aborted\n" % (os.getpid(), ), self.log)
continue
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pylint hates useless indents. Deliberately not fixing this until #3148 is merged, otherwise merging is a pain.

else:
worker = self.WORKERS.pop(wpid, None)
if not worker:
self.log.debug("Non-worker subprocess (pid:%s) exited", wpid)
continue
# A worker was terminated. If the termination reason was
# that it could not boot, we'll shut it down to avoid
# infinite start/stop cycles.
Expand Down Expand Up @@ -552,9 +575,6 @@ def reap_workers(self):
msg += " Perhaps out of memory?"
self.log.error(msg)

worker = self.WORKERS.pop(wpid, None)
if not worker:
continue
worker.tmp.close()
self.cfg.child_exit(self, worker)
except OSError as e:
Expand Down
4 changes: 4 additions & 0 deletions gunicorn/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1123,6 +1123,10 @@ class Pidfile(Setting):
A filename to use for the PID file.

If not set, no PID file will be written.

.. note::
During master re-exec, a ``.2`` suffix is added to
this path to store the PID of the newly launched master.
"""


Expand Down
8 changes: 8 additions & 0 deletions gunicorn/systemd.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import os
import socket
import time

SD_LISTEN_FDS_START = 3

Expand Down Expand Up @@ -66,6 +67,13 @@ def sd_notify(state, logger, unset_environment=False):
if addr[0] == '@':
addr = '\0' + addr[1:]
sock.connect(addr)
assert state.endswith("\n")
if "RELOADING" in state: # broad, but systemd man promises tolerating
# wrong clock on some platforms.. but this is only needed on Linux
# nsec = 10**-9
# usec = 10**-6
state += "MONOTONIC_USEC=%d\n" % (1_000*time.monotonic_ns(), )
logger.debug("sd_notify: %r" % (state, ))
sock.sendall(state.encode('utf-8'))
except Exception:
logger.debug("Exception while invoking sd_notify()", exc_info=True)
Expand Down
15 changes: 9 additions & 6 deletions tests/test_arbiter.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,24 +71,27 @@ def test_arbiter_stop_does_not_unlink_when_using_reuse_port(close_sockets):

@mock.patch('os.getpid')
@mock.patch('os.fork')
@mock.patch('os.execvpe')
def test_arbiter_reexec_passing_systemd_sockets(execvpe, fork, getpid):
@mock.patch('os.execve')
@mock.patch('gunicorn.systemd.sd_notify')
def test_arbiter_reexec_passing_systemd_sockets(sd_notify, execve, fork, getpid):
arbiter = gunicorn.arbiter.Arbiter(DummyApplication())
arbiter.LISTENERS = [mock.Mock(), mock.Mock()]
arbiter.systemd = True
fork.return_value = 0
sd_notify.return_value = None
getpid.side_effect = [2, 3]
arbiter.reexec()
environ = execvpe.call_args[0][2]
environ = execve.call_args[0][2]
assert environ['GUNICORN_PID'] == '2'
assert environ['LISTEN_FDS'] == '2'
assert environ['LISTEN_PID'] == '3'
sd_notify.assert_called_once()


@mock.patch('os.getpid')
@mock.patch('os.fork')
@mock.patch('os.execvpe')
def test_arbiter_reexec_passing_gunicorn_sockets(execvpe, fork, getpid):
@mock.patch('os.execve')
def test_arbiter_reexec_passing_gunicorn_sockets(execve, fork, getpid):
arbiter = gunicorn.arbiter.Arbiter(DummyApplication())
listener1 = mock.Mock()
listener2 = mock.Mock()
Expand All @@ -98,7 +101,7 @@ def test_arbiter_reexec_passing_gunicorn_sockets(execvpe, fork, getpid):
fork.return_value = 0
getpid.side_effect = [2, 3]
arbiter.reexec()
environ = execvpe.call_args[0][2]
environ = execve.call_args[0][2]
assert environ['GUNICORN_FD'] == '4,5'
assert environ['GUNICORN_PID'] == '2'

Expand Down
Loading