From 2cbd63df9894ee3ddb83702dd372578bb18cf4b9 Mon Sep 17 00:00:00 2001 From: Zac Mrowicki Date: Wed, 11 Oct 2023 18:57:26 +0000 Subject: [PATCH 1/2] systemd: Backport patches that allow netlink timeout to be configured Backport 2 patches that allow the configurability of the default timeout for waiting for replies for netlink messages from the kernel. The default timeout is 25 seconds and it is possible to hit this timeout if the system is under load. If the timeout is hit, network links can become unusable without intervention. Reference issue: github.com/systemd/systemd/issues/25441 --- ...calc_elapse-return-USEC_INFINITY-whe.patch | 117 ++++++++++++++++++ ...the-default-timeout-configurable-by-.patch | 70 +++++++++++ packages/systemd/systemd.spec | 9 ++ 3 files changed, 196 insertions(+) create mode 100644 packages/systemd/1001-sd-netlink-make-calc_elapse-return-USEC_INFINITY-whe.patch create mode 100644 packages/systemd/1002-sd-netlink-make-the-default-timeout-configurable-by-.patch diff --git a/packages/systemd/1001-sd-netlink-make-calc_elapse-return-USEC_INFINITY-whe.patch b/packages/systemd/1001-sd-netlink-make-calc_elapse-return-USEC_INFINITY-whe.patch new file mode 100644 index 00000000000..146d416ecfa --- /dev/null +++ b/packages/systemd/1001-sd-netlink-make-calc_elapse-return-USEC_INFINITY-whe.patch @@ -0,0 +1,117 @@ +From 926bf2c26d6e69e2e31a74c9ec50f882c3af8d79 Mon Sep 17 00:00:00 2001 +From: Yu Watanabe +Date: Sun, 1 Oct 2023 12:04:52 +0900 +Subject: [PATCH] sd-netlink: make calc_elapse() return USEC_INFINITY when no + timeout is requested + +Then, timout_compare() becomes simplar, the timeout value becomes +consistent with what sd_netlink_get_timeout() provides. + +This also drop unnecessary assignment of reply_callback.timeout after +the slot is dropped from the prioq. +--- + src/libsystemd/sd-netlink/netlink-slot.c | 2 +- + src/libsystemd/sd-netlink/sd-netlink.c | 24 ++++++------------------ + 2 files changed, 7 insertions(+), 19 deletions(-) + +diff --git a/src/libsystemd/sd-netlink/netlink-slot.c b/src/libsystemd/sd-netlink/netlink-slot.c +index 34f527d07f..b2525ff9cf 100644 +--- a/src/libsystemd/sd-netlink/netlink-slot.c ++++ b/src/libsystemd/sd-netlink/netlink-slot.c +@@ -63,7 +63,7 @@ void netlink_slot_disconnect(sd_netlink_slot *slot, bool unref) { + case NETLINK_REPLY_CALLBACK: + (void) hashmap_remove(nl->reply_callbacks, &slot->reply_callback.serial); + +- if (slot->reply_callback.timeout != 0) ++ if (slot->reply_callback.timeout != USEC_INFINITY) + prioq_remove(nl->reply_callbacks_prioq, &slot->reply_callback, &slot->reply_callback.prioq_idx); + + break; +diff --git a/src/libsystemd/sd-netlink/sd-netlink.c b/src/libsystemd/sd-netlink/sd-netlink.c +index 74f297243a..01a04eb308 100644 +--- a/src/libsystemd/sd-netlink/sd-netlink.c ++++ b/src/libsystemd/sd-netlink/sd-netlink.c +@@ -254,7 +254,6 @@ static int process_timeout(sd_netlink *nl) { + return r; + + assert_se(prioq_pop(nl->reply_callbacks_prioq) == c); +- c->timeout = 0; + hashmap_remove(nl->reply_callbacks, UINT32_TO_PTR(c->serial)); + + slot = container_of(c, sd_netlink_slot, reply_callback); +@@ -287,10 +286,8 @@ static int process_reply(sd_netlink *nl, sd_netlink_message *m) { + if (!c) + return 0; + +- if (c->timeout != 0) { ++ if (c->timeout != USEC_INFINITY) + prioq_remove(nl->reply_callbacks_prioq, c, &c->prioq_idx); +- c->timeout = 0; +- } + + r = sd_netlink_message_get_type(m, &type); + if (r < 0) +@@ -419,10 +416,7 @@ int sd_netlink_process(sd_netlink *nl, sd_netlink_message **ret) { + return r; + } + +-static usec_t calc_elapse(uint64_t usec) { +- if (usec == UINT64_MAX) +- return 0; +- ++static usec_t timespan_to_timestamp(usec_t usec) { + if (usec == 0) + usec = NETLINK_DEFAULT_TIMEOUT_USEC; + +@@ -476,12 +470,6 @@ int sd_netlink_wait(sd_netlink *nl, uint64_t timeout_usec) { + static int timeout_compare(const void *a, const void *b) { + const struct reply_callback *x = a, *y = b; + +- if (x->timeout != 0 && y->timeout == 0) +- return -1; +- +- if (x->timeout == 0 && y->timeout != 0) +- return 1; +- + return CMP(x->timeout, y->timeout); + } + +@@ -521,7 +509,7 @@ int sd_netlink_call_async( + return r; + + slot->reply_callback.callback = callback; +- slot->reply_callback.timeout = calc_elapse(usec); ++ slot->reply_callback.timeout = timespan_to_timestamp(usec); + + k = sd_netlink_send(nl, m, &slot->reply_callback.serial); + if (k < 0) +@@ -531,7 +519,7 @@ int sd_netlink_call_async( + if (r < 0) + return r; + +- if (slot->reply_callback.timeout != 0) { ++ if (slot->reply_callback.timeout != USEC_INFINITY) { + r = prioq_put(nl->reply_callbacks_prioq, &slot->reply_callback, &slot->reply_callback.prioq_idx); + if (r < 0) { + (void) hashmap_remove(nl->reply_callbacks, UINT32_TO_PTR(slot->reply_callback.serial)); +@@ -562,7 +550,7 @@ int sd_netlink_read( + assert_return(nl, -EINVAL); + assert_return(!netlink_pid_changed(nl), -ECHILD); + +- timeout = calc_elapse(usec); ++ timeout = timespan_to_timestamp(usec); + + for (;;) { + usec_t left; +@@ -609,7 +597,7 @@ int sd_netlink_read( + /* received message, so try to process straight away */ + continue; + +- if (timeout > 0) { ++ if (timeout != USEC_INFINITY) { + usec_t n; + + n = now(CLOCK_MONOTONIC); +-- +2.38.1 + diff --git a/packages/systemd/1002-sd-netlink-make-the-default-timeout-configurable-by-.patch b/packages/systemd/1002-sd-netlink-make-the-default-timeout-configurable-by-.patch new file mode 100644 index 00000000000..08d7a58191a --- /dev/null +++ b/packages/systemd/1002-sd-netlink-make-the-default-timeout-configurable-by-.patch @@ -0,0 +1,70 @@ +From 5e518008b9015ced364e92648ad17f51e95442bc Mon Sep 17 00:00:00 2001 +From: Yu Watanabe +Date: Sun, 1 Oct 2023 12:04:59 +0900 +Subject: [PATCH] sd-netlink: make the default timeout configurable by + environment variable + +On normal systems, triggering a timeout should be a bug in code or +configuration error, so I do not think we should extend the default +timeout. Also, we should not introduce a 'first class' configuration +option about that. But, making it configurable may be useful for cases +such that "an extremely highly utilized system (lots of OOM kills, +very high CPU utilization, etc)". + +Closes #25441. +--- + docs/ENVIRONMENT.md | 3 +++ + src/libsystemd/sd-netlink/sd-netlink.c | 23 +++++++++++++++++++++-- + 2 files changed, 24 insertions(+), 2 deletions(-) + +diff --git a/docs/ENVIRONMENT.md b/docs/ENVIRONMENT.md +index 1e7a75a36c..81b3c36d58 100644 +--- a/docs/ENVIRONMENT.md ++++ b/docs/ENVIRONMENT.md +@@ -108,6 +108,9 @@ All tools: + for example in `systemd-nspawn`, will be logged to the audit log, if the + kernel supports this. + ++* `$SYSTEMD_NETLINK_DEFAULT_TIMEOUT` — specifies the default timeout of waiting ++ replies for netlink messages from the kernel. Defaults to 25 seconds. ++ + `systemctl`: + + * `$SYSTEMCTL_FORCE_BUS=1` — if set, do not connect to PID 1's private D-Bus +diff --git a/src/libsystemd/sd-netlink/sd-netlink.c b/src/libsystemd/sd-netlink/sd-netlink.c +index 01a04eb308..dd74a1aa78 100644 +--- a/src/libsystemd/sd-netlink/sd-netlink.c ++++ b/src/libsystemd/sd-netlink/sd-netlink.c +@@ -417,8 +417,27 @@ int sd_netlink_process(sd_netlink *nl, sd_netlink_message **ret) { + } + + static usec_t timespan_to_timestamp(usec_t usec) { +- if (usec == 0) +- usec = NETLINK_DEFAULT_TIMEOUT_USEC; ++ static bool default_timeout_set = false; ++ static usec_t default_timeout; ++ int r; ++ ++ if (usec == 0) { ++ if (!default_timeout_set) { ++ const char *e; ++ ++ default_timeout_set = true; ++ default_timeout = NETLINK_DEFAULT_TIMEOUT_USEC; ++ ++ e = getenv("SYSTEMD_NETLINK_DEFAULT_TIMEOUT"); ++ if (e) { ++ r = parse_sec(e, &default_timeout); ++ if (r < 0) ++ log_debug_errno(r, "sd-netlink: Failed to parse $SYSTEMD_NETLINK_DEFAULT_TIMEOUT environment variable, ignoring: %m"); ++ } ++ } ++ ++ usec = default_timeout; ++ } + + return usec_add(now(CLOCK_MONOTONIC), usec); + } +-- +2.38.1 + diff --git a/packages/systemd/systemd.spec b/packages/systemd/systemd.spec index b3777d284a0..e743f07ceab 100644 --- a/packages/systemd/systemd.spec +++ b/packages/systemd/systemd.spec @@ -14,6 +14,15 @@ Source3: journald.conf Source4: issue Source5: systemd-journald.conf +# Backport of upstream patches that make the netlink default timeout +# configurable. Bottlerocket carries this patch and configures the timeout in +# an effort to avoid a situation where a network link becomes unusable if the +# system is under load and doesn't process the RTM_NEWROUTE acknowledgement +# within the default timeout of 25 seconds. +# Reference issue: github.com/systemd/systemd/issues/25441 +Patch1001: 1001-sd-netlink-make-calc_elapse-return-USEC_INFINITY-whe.patch +Patch1002: 1002-sd-netlink-make-the-default-timeout-configurable-by-.patch + # Local patch to work around the fact that /var is a bind mount from # /local/var, and we want the /local/var/run symlink to point to /run. Patch9001: 9001-use-absolute-path-for-var-run-symlink.patch From 0ac35eddf7e477e4a7ebaefb5baa46c1eceddc55 Mon Sep 17 00:00:00 2001 From: Zac Mrowicki Date: Wed, 11 Oct 2023 19:48:26 +0000 Subject: [PATCH 2/2] release: Configure default SYSTEMD_NETLINK_DEFAULT_TIMEOUT Write a drop-in for the systemd-networkd service that sets the SYSTEMD_NETLINK_DEFAULT_TIMEOUT environment variable to "infinity", rather than use the default timeout of 25 seconds. This ensures that systemd-networkd won't put links into an inoperable state if the kernel/system is heavily loaded and doesn't respond right away. --- packages/release/release.spec | 7 +++++++ packages/release/systemd-networkd-service-env.conf | 2 ++ 2 files changed, 9 insertions(+) create mode 100644 packages/release/systemd-networkd-service-env.conf diff --git a/packages/release/release.spec b/packages/release/release.spec index 4e1533db90e..e0df5a23d64 100644 --- a/packages/release/release.spec +++ b/packages/release/release.spec @@ -75,6 +75,7 @@ Source1080: runtime.slice # Drop-in units to override defaults Source1100: systemd-tmpfiles-setup-service-debug.conf Source1101: systemd-resolved-service-env.conf +Source1102: systemd-networkd-service-env.conf # systemd-udevd default link Source1200: 80-release.link @@ -167,6 +168,10 @@ install -d %{buildroot}%{_cross_unitdir}/systemd-resolved.service.d install -p -m 0644 %{S:1101} \ %{buildroot}%{_cross_unitdir}/systemd-resolved.service.d/00-env.conf +install -d %{buildroot}%{_cross_unitdir}/systemd-networkd.service.d +install -p -m 0644 %{S:1102} \ + %{buildroot}%{_cross_unitdir}/systemd-networkd.service.d/00-env.conf + LOWERPATH=$(systemd-escape --path %{_cross_sharedstatedir}/kernel-devel/.overlay/lower) sed -e 's|PREFIX|%{_cross_prefix}|' %{S:1020} > ${LOWERPATH}.mount install -p -m 0644 ${LOWERPATH}.mount %{buildroot}%{_cross_unitdir} @@ -251,6 +256,8 @@ ln -s preconfigured.target %{buildroot}%{_cross_unitdir}/default.target %{_cross_unitdir}/deprecation-warning@.timer %dir %{_cross_unitdir}/systemd-resolved.service.d %{_cross_unitdir}/systemd-resolved.service.d/00-env.conf +%dir %{_cross_unitdir}/systemd-networkd.service.d +%{_cross_unitdir}/systemd-networkd.service.d/00-env.conf %dir %{_cross_unitdir}/systemd-tmpfiles-setup.service.d %{_cross_unitdir}/systemd-tmpfiles-setup.service.d/00-debug.conf %dir %{_cross_templatedir} diff --git a/packages/release/systemd-networkd-service-env.conf b/packages/release/systemd-networkd-service-env.conf new file mode 100644 index 00000000000..e81831b77f6 --- /dev/null +++ b/packages/release/systemd-networkd-service-env.conf @@ -0,0 +1,2 @@ +[Service] +Environment=SYSTEMD_NETLINK_DEFAULT_TIMEOUT=infinity