Skip to content

Commit

Permalink
systemd: Backport patches that allow netlink timeout to be configured
Browse files Browse the repository at this point in the history
Backport 2 patches that allow the configurability of the default timeout
for waiting for replies for netlink messages from the kernel.  The
default timeout is 25 seconds and it is possible to hit this timeout if
the system is under load.  If the timeout is hit, network links can
become unusable without intervention.

Reference issue: github.com/systemd/systemd/issues/25441
  • Loading branch information
zmrow committed Oct 11, 2023
1 parent f51b371 commit 2cbd63d
Show file tree
Hide file tree
Showing 3 changed files with 196 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
From 926bf2c26d6e69e2e31a74c9ec50f882c3af8d79 Mon Sep 17 00:00:00 2001
From: Yu Watanabe <[email protected]>
Date: Sun, 1 Oct 2023 12:04:52 +0900
Subject: [PATCH] sd-netlink: make calc_elapse() return USEC_INFINITY when no
timeout is requested

Then, timout_compare() becomes simplar, the timeout value becomes
consistent with what sd_netlink_get_timeout() provides.

This also drop unnecessary assignment of reply_callback.timeout after
the slot is dropped from the prioq.
---
src/libsystemd/sd-netlink/netlink-slot.c | 2 +-
src/libsystemd/sd-netlink/sd-netlink.c | 24 ++++++------------------
2 files changed, 7 insertions(+), 19 deletions(-)

diff --git a/src/libsystemd/sd-netlink/netlink-slot.c b/src/libsystemd/sd-netlink/netlink-slot.c
index 34f527d07f..b2525ff9cf 100644
--- a/src/libsystemd/sd-netlink/netlink-slot.c
+++ b/src/libsystemd/sd-netlink/netlink-slot.c
@@ -63,7 +63,7 @@ void netlink_slot_disconnect(sd_netlink_slot *slot, bool unref) {
case NETLINK_REPLY_CALLBACK:
(void) hashmap_remove(nl->reply_callbacks, &slot->reply_callback.serial);

- if (slot->reply_callback.timeout != 0)
+ if (slot->reply_callback.timeout != USEC_INFINITY)
prioq_remove(nl->reply_callbacks_prioq, &slot->reply_callback, &slot->reply_callback.prioq_idx);

break;
diff --git a/src/libsystemd/sd-netlink/sd-netlink.c b/src/libsystemd/sd-netlink/sd-netlink.c
index 74f297243a..01a04eb308 100644
--- a/src/libsystemd/sd-netlink/sd-netlink.c
+++ b/src/libsystemd/sd-netlink/sd-netlink.c
@@ -254,7 +254,6 @@ static int process_timeout(sd_netlink *nl) {
return r;

assert_se(prioq_pop(nl->reply_callbacks_prioq) == c);
- c->timeout = 0;
hashmap_remove(nl->reply_callbacks, UINT32_TO_PTR(c->serial));

slot = container_of(c, sd_netlink_slot, reply_callback);
@@ -287,10 +286,8 @@ static int process_reply(sd_netlink *nl, sd_netlink_message *m) {
if (!c)
return 0;

- if (c->timeout != 0) {
+ if (c->timeout != USEC_INFINITY)
prioq_remove(nl->reply_callbacks_prioq, c, &c->prioq_idx);
- c->timeout = 0;
- }

r = sd_netlink_message_get_type(m, &type);
if (r < 0)
@@ -419,10 +416,7 @@ int sd_netlink_process(sd_netlink *nl, sd_netlink_message **ret) {
return r;
}

-static usec_t calc_elapse(uint64_t usec) {
- if (usec == UINT64_MAX)
- return 0;
-
+static usec_t timespan_to_timestamp(usec_t usec) {
if (usec == 0)
usec = NETLINK_DEFAULT_TIMEOUT_USEC;

@@ -476,12 +470,6 @@ int sd_netlink_wait(sd_netlink *nl, uint64_t timeout_usec) {
static int timeout_compare(const void *a, const void *b) {
const struct reply_callback *x = a, *y = b;

- if (x->timeout != 0 && y->timeout == 0)
- return -1;
-
- if (x->timeout == 0 && y->timeout != 0)
- return 1;
-
return CMP(x->timeout, y->timeout);
}

@@ -521,7 +509,7 @@ int sd_netlink_call_async(
return r;

slot->reply_callback.callback = callback;
- slot->reply_callback.timeout = calc_elapse(usec);
+ slot->reply_callback.timeout = timespan_to_timestamp(usec);

k = sd_netlink_send(nl, m, &slot->reply_callback.serial);
if (k < 0)
@@ -531,7 +519,7 @@ int sd_netlink_call_async(
if (r < 0)
return r;

- if (slot->reply_callback.timeout != 0) {
+ if (slot->reply_callback.timeout != USEC_INFINITY) {
r = prioq_put(nl->reply_callbacks_prioq, &slot->reply_callback, &slot->reply_callback.prioq_idx);
if (r < 0) {
(void) hashmap_remove(nl->reply_callbacks, UINT32_TO_PTR(slot->reply_callback.serial));
@@ -562,7 +550,7 @@ int sd_netlink_read(
assert_return(nl, -EINVAL);
assert_return(!netlink_pid_changed(nl), -ECHILD);

- timeout = calc_elapse(usec);
+ timeout = timespan_to_timestamp(usec);

for (;;) {
usec_t left;
@@ -609,7 +597,7 @@ int sd_netlink_read(
/* received message, so try to process straight away */
continue;

- if (timeout > 0) {
+ if (timeout != USEC_INFINITY) {
usec_t n;

n = now(CLOCK_MONOTONIC);
--
2.38.1

Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
From 5e518008b9015ced364e92648ad17f51e95442bc Mon Sep 17 00:00:00 2001
From: Yu Watanabe <[email protected]>
Date: Sun, 1 Oct 2023 12:04:59 +0900
Subject: [PATCH] sd-netlink: make the default timeout configurable by
environment variable

On normal systems, triggering a timeout should be a bug in code or
configuration error, so I do not think we should extend the default
timeout. Also, we should not introduce a 'first class' configuration
option about that. But, making it configurable may be useful for cases
such that "an extremely highly utilized system (lots of OOM kills,
very high CPU utilization, etc)".

Closes #25441.
---
docs/ENVIRONMENT.md | 3 +++
src/libsystemd/sd-netlink/sd-netlink.c | 23 +++++++++++++++++++++--
2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/docs/ENVIRONMENT.md b/docs/ENVIRONMENT.md
index 1e7a75a36c..81b3c36d58 100644
--- a/docs/ENVIRONMENT.md
+++ b/docs/ENVIRONMENT.md
@@ -108,6 +108,9 @@ All tools:
for example in `systemd-nspawn`, will be logged to the audit log, if the
kernel supports this.

+* `$SYSTEMD_NETLINK_DEFAULT_TIMEOUT` — specifies the default timeout of waiting
+ replies for netlink messages from the kernel. Defaults to 25 seconds.
+
`systemctl`:

* `$SYSTEMCTL_FORCE_BUS=1` — if set, do not connect to PID 1's private D-Bus
diff --git a/src/libsystemd/sd-netlink/sd-netlink.c b/src/libsystemd/sd-netlink/sd-netlink.c
index 01a04eb308..dd74a1aa78 100644
--- a/src/libsystemd/sd-netlink/sd-netlink.c
+++ b/src/libsystemd/sd-netlink/sd-netlink.c
@@ -417,8 +417,27 @@ int sd_netlink_process(sd_netlink *nl, sd_netlink_message **ret) {
}

static usec_t timespan_to_timestamp(usec_t usec) {
- if (usec == 0)
- usec = NETLINK_DEFAULT_TIMEOUT_USEC;
+ static bool default_timeout_set = false;
+ static usec_t default_timeout;
+ int r;
+
+ if (usec == 0) {
+ if (!default_timeout_set) {
+ const char *e;
+
+ default_timeout_set = true;
+ default_timeout = NETLINK_DEFAULT_TIMEOUT_USEC;
+
+ e = getenv("SYSTEMD_NETLINK_DEFAULT_TIMEOUT");
+ if (e) {
+ r = parse_sec(e, &default_timeout);
+ if (r < 0)
+ log_debug_errno(r, "sd-netlink: Failed to parse $SYSTEMD_NETLINK_DEFAULT_TIMEOUT environment variable, ignoring: %m");
+ }
+ }
+
+ usec = default_timeout;
+ }

return usec_add(now(CLOCK_MONOTONIC), usec);
}
--
2.38.1

9 changes: 9 additions & 0 deletions packages/systemd/systemd.spec
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,15 @@ Source3: journald.conf
Source4: issue
Source5: systemd-journald.conf

# Backport of upstream patches that make the netlink default timeout
# configurable. Bottlerocket carries this patch and configures the timeout in
# an effort to avoid a situation where a network link becomes unusable if the
# system is under load and doesn't process the RTM_NEWROUTE acknowledgement
# within the default timeout of 25 seconds.
# Reference issue: github.com/systemd/systemd/issues/25441
Patch1001: 1001-sd-netlink-make-calc_elapse-return-USEC_INFINITY-whe.patch
Patch1002: 1002-sd-netlink-make-the-default-timeout-configurable-by-.patch

# Local patch to work around the fact that /var is a bind mount from
# /local/var, and we want the /local/var/run symlink to point to /run.
Patch9001: 9001-use-absolute-path-for-var-run-symlink.patch
Expand Down

0 comments on commit 2cbd63d

Please sign in to comment.