From fcf571f73c568682e9efb536b54f4a61a60ed5bb Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Thu, 19 Sep 2024 18:18:47 -0400 Subject: [PATCH] Reduce and handle EAGAIN errors on AIO label reads At least FreeBSD has a limit of 256 simultaneous AIO requests per process. Attempt to issue more results in EAGAIN errors. Since we issue 4 requests per disk/partition from 2xCPUs threads, it is quite easy to reach that limit on large systems, that results in random pool import failures. It annoyed me for quite a while on a system with 64 CPUs and 70+ partitioned disks. This patch from one side limits the number of threads to avoid the error, while from another should softly fall back to sync reads in case of error. It takes into account _SC_AIO_MAX as a system-wide AIO limit and _SC_AIO_LISTIO_MAX as a closest value to per-process limit. The last not exactly right, but it is the best I found. Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. --- lib/libzutil/zutil_import.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/lib/libzutil/zutil_import.c b/lib/libzutil/zutil_import.c index 06705ff4d9b4..318d5bccbdef 100644 --- a/lib/libzutil/zutil_import.c +++ b/lib/libzutil/zutil_import.c @@ -1071,6 +1071,7 @@ zpool_read_label(int fd, nvlist_t **config, int *num_labels) * Try the slow method. */ zfs_fallthrough; + case EAGAIN: case EOPNOTSUPP: case ENOSYS: do_slow = B_TRUE; @@ -1464,7 +1465,21 @@ zpool_find_import_impl(libpc_handle_t *hdl, importargs_t *iarg, * validating labels, a large number of threads can be used due to * minimal contention. */ - t = tpool_create(1, 2 * sysconf(_SC_NPROCESSORS_ONLN), 0, NULL); + long threads = 2 * sysconf(_SC_NPROCESSORS_ONLN); +#ifdef HAVE_AIO_H + long am; +#ifdef _SC_AIO_LISTIO_MAX + am = sysconf(_SC_AIO_LISTIO_MAX); + if (am >= 4) + threads = MIN(threads, am / 4); +#endif +#ifdef _SC_AIO_MAX + am = sysconf(_SC_AIO_MAX); + if (am >= 4) + threads = MIN(threads, am / 4); +#endif +#endif + t = tpool_create(1, threads, 0, NULL); for (slice = avl_first(cache); slice; (slice = avl_walk(cache, slice, AVL_AFTER))) (void) tpool_dispatch(t, zpool_open_func, slice);