From cd1cfae6cd1b67e71e30275afcd70815fa5a0f26 Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Mon, 16 Oct 2023 14:19:13 +1100 Subject: [PATCH] spa: document spa_thread() and SDC feature gates spa_thread() and the "System Duty Cycle" scheduling class are from Illumos and have not yet been adapted to Linux or FreeBSD. HAVE_SPA_THREAD has long been explicitly undefined and used to mark spa_thread(), but there's some related taskq code that can never be invoked without it, which makes some already-tricky code harder to read. HAVE_SYSDC is introduced in this commit to mark the SDC parts. SDC requires spa_thread(), but the inverse is not true, so they are separate. I don't want to make the call to just remove it because I still harbour hopes that OpenZFS could become a first-class citizen on Illumos someday. But hopefully this will at least make the reason it exists a bit clearer for people without long memories and/or an interest in history. For those that are interested in the history, the original FreeBSD port of ZFS (before ZFS-on-Linux was adopted there) did have a spa_thread(), but not SDC. The last version of that before it was removed can be read here: https://github.com/freebsd/freebsd-src/blob/22df1ffd812f0395cdb7c0b1edae1f67b991562a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c Meanwhile, more information on the SDC scheduling class is here: https://github.com/illumos/illumos-gate/blob/master/usr/src/uts/common/disp/sysdc.c Signed-off-by: Rob Norris Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. --- module/zfs/spa.c | 41 +++++++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 0b69568e2aad..aa97144f16e4 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -100,6 +100,26 @@ #include "zfs_prop.h" #include "zfs_comutil.h" +/* + * spa_thread() existed on Illumos as a parent thread for the various worker + * threads that actually run the pool, as a way to both reference the entire + * pool work as a single object, and to share properties like scheduling + * options. It has not yet been adapted to Linux or FreeBSD. This define is + * used to mark related parts of the code to make things easier for the reader, + * and to compile this code out. It can be removed when someone implements it, + * moves it to some Illumos-specific place, or removes it entirely. + */ +#undef HAVE_SPA_THREAD + +/* + * The "System Duty Cycle" scheduling class is an Illumos feature to help + * prevent CPU-intensive kernel threads from affecting latency on interactive + * threads. It doesn't exist on Linux or FreeBSD, so the supporting code is + * gated behind a define. On Illumos SDC depends on spa_thread(), but + * spa_thread() also has other uses, so this is a separate define. + */ +#undef HAVE_SYSDC + /* * The interval, in seconds, at which failed configuration cache file writes * should be retried. @@ -176,10 +196,15 @@ static uint_t metaslab_preload_pct = 50; static uint_t zio_taskq_batch_pct = 80; /* 1 thread per cpu in pset */ static uint_t zio_taskq_batch_tpq; /* threads per taskq */ + +#ifdef HAVE_SYSDC static const boolean_t zio_taskq_sysdc = B_TRUE; /* use SDC scheduling class */ static const uint_t zio_taskq_basedc = 80; /* base duty cycle */ +#endif +#ifdef HAVE_SPA_THREAD static const boolean_t spa_create_process = B_TRUE; /* no process => no sysdc */ +#endif /* * Report any spa_load_verify errors found, but do not fail spa_load. @@ -1029,7 +1054,9 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q) uint_t count = ztip->zti_count; spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q]; uint_t cpus, flags = TASKQ_DYNAMIC; +#ifdef HAVE_SYSDC boolean_t batch = B_FALSE; +#endif switch (mode) { case ZTI_MODE_FIXED: @@ -1037,7 +1064,9 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q) break; case ZTI_MODE_BATCH: +#ifdef HAVE_SYSDC batch = B_TRUE; +#endif flags |= TASKQ_THREADS_CPU_PCT; value = MIN(zio_taskq_batch_pct, 100); break; @@ -1106,6 +1135,7 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q) (void) snprintf(name, sizeof (name), "%s_%s", zio_type_name[t], zio_taskq_types[q]); +#ifdef HAVE_SYSDC if (zio_taskq_sysdc && spa->spa_proc != &p0) { if (batch) flags |= TASKQ_DC_BATCH; @@ -1114,6 +1144,7 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q) tq = taskq_create_sysdc(name, value, 50, INT_MAX, spa->spa_proc, zio_taskq_basedc, flags); } else { +#endif pri_t pri = maxclsyspri; /* * The write issue taskq can be extremely CPU @@ -1139,7 +1170,9 @@ spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q) } tq = taskq_create_proc(name, value, pri, 50, INT_MAX, spa->spa_proc, flags); +#ifdef HAVE_SYSDC } +#endif tqs->stqs_taskq[i] = tq; } @@ -1224,11 +1257,6 @@ spa_create_zio_taskqs(spa_t *spa) } } -/* - * Disabled until spa_thread() can be adapted for Linux. - */ -#undef HAVE_SPA_THREAD - #if defined(_KERNEL) && defined(HAVE_SPA_THREAD) static void spa_thread(void *arg) @@ -1269,9 +1297,11 @@ spa_thread(void *arg) pool_unlock(); } +#ifdef HAVE_SYSDC if (zio_taskq_sysdc) { sysdc_thread_enter(curthread, 100, 0); } +#endif spa->spa_proc = curproc; spa->spa_did = curthread->t_did; @@ -1327,7 +1357,6 @@ spa_activate(spa_t *spa, spa_mode_t mode) ASSERT(spa->spa_proc == &p0); spa->spa_did = 0; - (void) spa_create_process; #ifdef HAVE_SPA_THREAD /* Only create a process if we're going to be around a while. */ if (spa_create_process && strcmp(spa->spa_name, TRYIMPORT_NAME) != 0) {