Skip to content

Commit

Permalink
Merge pull request bottlerocket-os#3520 from zmrow/netlink-timeout
Browse files Browse the repository at this point in the history
Backport systemd patches and configure netlink timeout
  • Loading branch information
zmrow authored Oct 12, 2023
2 parents 158b159 + 0ac35ed commit c3e7a9e
Show file tree
Hide file tree
Showing 5 changed files with 205 additions and 0 deletions.
7 changes: 7 additions & 0 deletions packages/release/release.spec
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ Source1080: runtime.slice
# Drop-in units to override defaults
Source1100: systemd-tmpfiles-setup-service-debug.conf
Source1101: systemd-resolved-service-env.conf
Source1102: systemd-networkd-service-env.conf

# systemd-udevd default link
Source1200: 80-release.link
Expand Down Expand Up @@ -167,6 +168,10 @@ install -d %{buildroot}%{_cross_unitdir}/systemd-resolved.service.d
install -p -m 0644 %{S:1101} \
%{buildroot}%{_cross_unitdir}/systemd-resolved.service.d/00-env.conf

install -d %{buildroot}%{_cross_unitdir}/systemd-networkd.service.d
install -p -m 0644 %{S:1102} \
%{buildroot}%{_cross_unitdir}/systemd-networkd.service.d/00-env.conf

LOWERPATH=$(systemd-escape --path %{_cross_sharedstatedir}/kernel-devel/.overlay/lower)
sed -e 's|PREFIX|%{_cross_prefix}|' %{S:1020} > ${LOWERPATH}.mount
install -p -m 0644 ${LOWERPATH}.mount %{buildroot}%{_cross_unitdir}
Expand Down Expand Up @@ -251,6 +256,8 @@ ln -s preconfigured.target %{buildroot}%{_cross_unitdir}/default.target
%{_cross_unitdir}/[email protected]
%dir %{_cross_unitdir}/systemd-resolved.service.d
%{_cross_unitdir}/systemd-resolved.service.d/00-env.conf
%dir %{_cross_unitdir}/systemd-networkd.service.d
%{_cross_unitdir}/systemd-networkd.service.d/00-env.conf
%dir %{_cross_unitdir}/systemd-tmpfiles-setup.service.d
%{_cross_unitdir}/systemd-tmpfiles-setup.service.d/00-debug.conf
%dir %{_cross_templatedir}
Expand Down
2 changes: 2 additions & 0 deletions packages/release/systemd-networkd-service-env.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[Service]
Environment=SYSTEMD_NETLINK_DEFAULT_TIMEOUT=infinity
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
From 926bf2c26d6e69e2e31a74c9ec50f882c3af8d79 Mon Sep 17 00:00:00 2001
From: Yu Watanabe <[email protected]>
Date: Sun, 1 Oct 2023 12:04:52 +0900
Subject: [PATCH] sd-netlink: make calc_elapse() return USEC_INFINITY when no
timeout is requested

Then, timout_compare() becomes simplar, the timeout value becomes
consistent with what sd_netlink_get_timeout() provides.

This also drop unnecessary assignment of reply_callback.timeout after
the slot is dropped from the prioq.
---
src/libsystemd/sd-netlink/netlink-slot.c | 2 +-
src/libsystemd/sd-netlink/sd-netlink.c | 24 ++++++------------------
2 files changed, 7 insertions(+), 19 deletions(-)

diff --git a/src/libsystemd/sd-netlink/netlink-slot.c b/src/libsystemd/sd-netlink/netlink-slot.c
index 34f527d07f..b2525ff9cf 100644
--- a/src/libsystemd/sd-netlink/netlink-slot.c
+++ b/src/libsystemd/sd-netlink/netlink-slot.c
@@ -63,7 +63,7 @@ void netlink_slot_disconnect(sd_netlink_slot *slot, bool unref) {
case NETLINK_REPLY_CALLBACK:
(void) hashmap_remove(nl->reply_callbacks, &slot->reply_callback.serial);

- if (slot->reply_callback.timeout != 0)
+ if (slot->reply_callback.timeout != USEC_INFINITY)
prioq_remove(nl->reply_callbacks_prioq, &slot->reply_callback, &slot->reply_callback.prioq_idx);

break;
diff --git a/src/libsystemd/sd-netlink/sd-netlink.c b/src/libsystemd/sd-netlink/sd-netlink.c
index 74f297243a..01a04eb308 100644
--- a/src/libsystemd/sd-netlink/sd-netlink.c
+++ b/src/libsystemd/sd-netlink/sd-netlink.c
@@ -254,7 +254,6 @@ static int process_timeout(sd_netlink *nl) {
return r;

assert_se(prioq_pop(nl->reply_callbacks_prioq) == c);
- c->timeout = 0;
hashmap_remove(nl->reply_callbacks, UINT32_TO_PTR(c->serial));

slot = container_of(c, sd_netlink_slot, reply_callback);
@@ -287,10 +286,8 @@ static int process_reply(sd_netlink *nl, sd_netlink_message *m) {
if (!c)
return 0;

- if (c->timeout != 0) {
+ if (c->timeout != USEC_INFINITY)
prioq_remove(nl->reply_callbacks_prioq, c, &c->prioq_idx);
- c->timeout = 0;
- }

r = sd_netlink_message_get_type(m, &type);
if (r < 0)
@@ -419,10 +416,7 @@ int sd_netlink_process(sd_netlink *nl, sd_netlink_message **ret) {
return r;
}

-static usec_t calc_elapse(uint64_t usec) {
- if (usec == UINT64_MAX)
- return 0;
-
+static usec_t timespan_to_timestamp(usec_t usec) {
if (usec == 0)
usec = NETLINK_DEFAULT_TIMEOUT_USEC;

@@ -476,12 +470,6 @@ int sd_netlink_wait(sd_netlink *nl, uint64_t timeout_usec) {
static int timeout_compare(const void *a, const void *b) {
const struct reply_callback *x = a, *y = b;

- if (x->timeout != 0 && y->timeout == 0)
- return -1;
-
- if (x->timeout == 0 && y->timeout != 0)
- return 1;
-
return CMP(x->timeout, y->timeout);
}

@@ -521,7 +509,7 @@ int sd_netlink_call_async(
return r;

slot->reply_callback.callback = callback;
- slot->reply_callback.timeout = calc_elapse(usec);
+ slot->reply_callback.timeout = timespan_to_timestamp(usec);

k = sd_netlink_send(nl, m, &slot->reply_callback.serial);
if (k < 0)
@@ -531,7 +519,7 @@ int sd_netlink_call_async(
if (r < 0)
return r;

- if (slot->reply_callback.timeout != 0) {
+ if (slot->reply_callback.timeout != USEC_INFINITY) {
r = prioq_put(nl->reply_callbacks_prioq, &slot->reply_callback, &slot->reply_callback.prioq_idx);
if (r < 0) {
(void) hashmap_remove(nl->reply_callbacks, UINT32_TO_PTR(slot->reply_callback.serial));
@@ -562,7 +550,7 @@ int sd_netlink_read(
assert_return(nl, -EINVAL);
assert_return(!netlink_pid_changed(nl), -ECHILD);

- timeout = calc_elapse(usec);
+ timeout = timespan_to_timestamp(usec);

for (;;) {
usec_t left;
@@ -609,7 +597,7 @@ int sd_netlink_read(
/* received message, so try to process straight away */
continue;

- if (timeout > 0) {
+ if (timeout != USEC_INFINITY) {
usec_t n;

n = now(CLOCK_MONOTONIC);
--
2.38.1

Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
From 5e518008b9015ced364e92648ad17f51e95442bc Mon Sep 17 00:00:00 2001
From: Yu Watanabe <[email protected]>
Date: Sun, 1 Oct 2023 12:04:59 +0900
Subject: [PATCH] sd-netlink: make the default timeout configurable by
environment variable

On normal systems, triggering a timeout should be a bug in code or
configuration error, so I do not think we should extend the default
timeout. Also, we should not introduce a 'first class' configuration
option about that. But, making it configurable may be useful for cases
such that "an extremely highly utilized system (lots of OOM kills,
very high CPU utilization, etc)".

Closes #25441.
---
docs/ENVIRONMENT.md | 3 +++
src/libsystemd/sd-netlink/sd-netlink.c | 23 +++++++++++++++++++++--
2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/docs/ENVIRONMENT.md b/docs/ENVIRONMENT.md
index 1e7a75a36c..81b3c36d58 100644
--- a/docs/ENVIRONMENT.md
+++ b/docs/ENVIRONMENT.md
@@ -108,6 +108,9 @@ All tools:
for example in `systemd-nspawn`, will be logged to the audit log, if the
kernel supports this.

+* `$SYSTEMD_NETLINK_DEFAULT_TIMEOUT` — specifies the default timeout of waiting
+ replies for netlink messages from the kernel. Defaults to 25 seconds.
+
`systemctl`:

* `$SYSTEMCTL_FORCE_BUS=1` — if set, do not connect to PID 1's private D-Bus
diff --git a/src/libsystemd/sd-netlink/sd-netlink.c b/src/libsystemd/sd-netlink/sd-netlink.c
index 01a04eb308..dd74a1aa78 100644
--- a/src/libsystemd/sd-netlink/sd-netlink.c
+++ b/src/libsystemd/sd-netlink/sd-netlink.c
@@ -417,8 +417,27 @@ int sd_netlink_process(sd_netlink *nl, sd_netlink_message **ret) {
}

static usec_t timespan_to_timestamp(usec_t usec) {
- if (usec == 0)
- usec = NETLINK_DEFAULT_TIMEOUT_USEC;
+ static bool default_timeout_set = false;
+ static usec_t default_timeout;
+ int r;
+
+ if (usec == 0) {
+ if (!default_timeout_set) {
+ const char *e;
+
+ default_timeout_set = true;
+ default_timeout = NETLINK_DEFAULT_TIMEOUT_USEC;
+
+ e = getenv("SYSTEMD_NETLINK_DEFAULT_TIMEOUT");
+ if (e) {
+ r = parse_sec(e, &default_timeout);
+ if (r < 0)
+ log_debug_errno(r, "sd-netlink: Failed to parse $SYSTEMD_NETLINK_DEFAULT_TIMEOUT environment variable, ignoring: %m");
+ }
+ }
+
+ usec = default_timeout;
+ }

return usec_add(now(CLOCK_MONOTONIC), usec);
}
--
2.38.1

9 changes: 9 additions & 0 deletions packages/systemd/systemd.spec
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,15 @@ Source3: journald.conf
Source4: issue
Source5: systemd-journald.conf

# Backport of upstream patches that make the netlink default timeout
# configurable. Bottlerocket carries this patch and configures the timeout in
# an effort to avoid a situation where a network link becomes unusable if the
# system is under load and doesn't process the RTM_NEWROUTE acknowledgement
# within the default timeout of 25 seconds.
# Reference issue: github.com/systemd/systemd/issues/25441
Patch1001: 1001-sd-netlink-make-calc_elapse-return-USEC_INFINITY-whe.patch
Patch1002: 1002-sd-netlink-make-the-default-timeout-configurable-by-.patch

# Local patch to work around the fact that /var is a bind mount from
# /local/var, and we want the /local/var/run symlink to point to /run.
Patch9001: 9001-use-absolute-path-for-var-run-symlink.patch
Expand Down

0 comments on commit c3e7a9e

Please sign in to comment.