From 0027d2ad335b02327ae58f5c33e5947ebc723bb5 Mon Sep 17 00:00:00 2001 From: Robert Evans Date: Sat, 20 Jan 2024 18:05:52 -0500 Subject: [PATCH] Use DNODE_FIND_DIRTY for SEEK_HOLE/SEEK_DATA. There is no need to worry about txg syncs anymore, so remove dmu_offset_next_sync and dnode_is_dirty too. This should be transparent to userland except that holes created by compression do not become visible until after the next txg sync. mmap_seek_001_pos is updated to force a sync to match this case. Signed-off-by: Robert Evans --- include/sys/dnode.h | 1 - module/zfs/dmu.c | 49 ++----------------- module/zfs/dnode.c | 28 ----------- tests/zfs-tests/cmd/Makefile.am | 3 ++ tests/zfs-tests/cmd/mmap_seek.c | 27 +++++++++- tests/zfs-tests/include/tunables.cfg | 1 - .../functional/mmap/mmap_seek_001_pos.ksh | 7 +-- 7 files changed, 33 insertions(+), 83 deletions(-) diff --git a/include/sys/dnode.h b/include/sys/dnode.h index 70c84241bcfe..b254e2dae4a8 100644 --- a/include/sys/dnode.h +++ b/include/sys/dnode.h @@ -438,7 +438,6 @@ boolean_t dnode_add_ref(dnode_t *dn, const void *ref); void dnode_rele(dnode_t *dn, const void *ref); void dnode_rele_and_unlock(dnode_t *dn, const void *tag, boolean_t evicting); int dnode_try_claim(objset_t *os, uint64_t object, int slots); -boolean_t dnode_is_dirty(dnode_t *dn); void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx); void dnode_set_dirtyctx(dnode_t *dn, dmu_tx_t *tx, const void *tag); void dnode_sync(dnode_t *dn, dmu_tx_t *tx); diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 32609399b79e..5766b6ec592e 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -75,15 +75,6 @@ static int zfs_nopwrite_enabled = 1; */ static uint_t zfs_per_txg_dirty_frees_percent = 30; -/* - * Enable/disable forcing txg sync when dirty checking for holes with lseek(). - * By default this is enabled to ensure accurate hole reporting, it can result - * in a significant performance penalty for lseek(SEEK_HOLE) heavy workloads. - * Disabling this option will result in holes never being reported in dirty - * files which is always safe. - */ -static int zfs_dmu_offset_next_sync = 1; - /* * Limit the amount we can prefetch with one call to this amount. This * helps to limit the amount of memory that can be used by prefetching. @@ -2496,45 +2487,16 @@ int dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) { dnode_t *dn; - int restarted = 0, err; + int err; -restart: err = dnode_hold(os, object, FTAG, &dn); if (err) return (err); - rw_enter(&dn->dn_struct_rwlock, RW_READER); - - if (dnode_is_dirty(dn)) { - /* - * If the zfs_dmu_offset_next_sync module option is enabled - * then hole reporting has been requested. Dirty dnodes - * must be synced to disk to accurately report holes. - * - * Provided a RL_READER rangelock spanning 0-UINT64_MAX is - * held by the caller only a single restart will be required. - * We tolerate callers which do not hold the rangelock by - * returning EBUSY and not reporting holes after one restart. - */ - if (zfs_dmu_offset_next_sync) { - rw_exit(&dn->dn_struct_rwlock); - dnode_rele(dn, FTAG); + err = dnode_next_offset(dn, + DNODE_FIND_DIRTY | (hole ? DNODE_FIND_HOLE : 0), + off, 1, 1, 0); - if (restarted) - return (SET_ERROR(EBUSY)); - - txg_wait_synced(dmu_objset_pool(os), 0); - restarted = 1; - goto restart; - } - - err = SET_ERROR(EBUSY); - } else { - err = dnode_next_offset(dn, DNODE_FIND_HAVELOCK | - (hole ? DNODE_FIND_HOLE : 0), off, 1, 1, 0); - } - - rw_exit(&dn->dn_struct_rwlock); dnode_rele(dn, FTAG); return (err); @@ -2939,9 +2901,6 @@ ZFS_MODULE_PARAM(zfs, zfs_, nopwrite_enabled, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, per_txg_dirty_frees_percent, UINT, ZMOD_RW, "Percentage of dirtied blocks from frees in one TXG"); -ZFS_MODULE_PARAM(zfs, zfs_, dmu_offset_next_sync, INT, ZMOD_RW, - "Enable forcing txg sync to find holes"); - ZFS_MODULE_PARAM(zfs, , dmu_prefetch_max, UINT, ZMOD_RW, "Limit one prefetch call to this size"); diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index f85fb73bb8eb..5fed3c3f1408 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -1815,34 +1815,6 @@ dnode_try_claim(objset_t *os, uint64_t object, int slots) slots, NULL, NULL)); } -/* - * Checks if the dnode itself is dirty, or is carrying any uncommitted records. - * It is important to check both conditions, as some operations (eg appending - * to a file) can dirty both as a single logical unit, but they are not synced - * out atomically, so checking one and not the other can result in an object - * appearing to be clean mid-way through a commit. - * - * Do not change this lightly! If you get it wrong, dmu_offset_next() can - * detect a hole where there is really data, leading to silent corruption. - */ -boolean_t -dnode_is_dirty(dnode_t *dn) -{ - mutex_enter(&dn->dn_mtx); - - for (int i = 0; i < TXG_SIZE; i++) { - if (multilist_link_active(&dn->dn_dirty_link[i]) || - !list_is_empty(&dn->dn_dirty_records[i])) { - mutex_exit(&dn->dn_mtx); - return (B_TRUE); - } - } - - mutex_exit(&dn->dn_mtx); - - return (B_FALSE); -} - void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx) { diff --git a/tests/zfs-tests/cmd/Makefile.am b/tests/zfs-tests/cmd/Makefile.am index 5250e72f9fa8..cf5612d97a78 100644 --- a/tests/zfs-tests/cmd/Makefile.am +++ b/tests/zfs-tests/cmd/Makefile.am @@ -68,6 +68,9 @@ scripts_zfs_tests_bin_PROGRAMS += %D%/mkbusy %D%/mkfile %D%/mkfiles %D%/mktree scripts_zfs_tests_bin_PROGRAMS += %D%/mmap_exec %D%/mmap_seek %D%/mmap_sync %D%/mmapwrite %D%/readmmap +%C%_mmap_seek_LDADD = \ + libzfs_core.la \ + libnvpair.la %C%_mmapwrite_LDADD = -lpthread if WANT_MMAP_LIBAIO diff --git a/tests/zfs-tests/cmd/mmap_seek.c b/tests/zfs-tests/cmd/mmap_seek.c index 2d250554a13f..d5e122b156a5 100644 --- a/tests/zfs-tests/cmd/mmap_seek.c +++ b/tests/zfs-tests/cmd/mmap_seek.c @@ -34,6 +34,8 @@ #ifdef __linux__ #include #endif +#include +#include /* some older uClibc's lack the defines, so we'll manually define them */ #ifdef __UCLIBC__ @@ -67,6 +69,18 @@ seek_hole(int fd, off_t offset, off_t expected) } } +static void +sync_pool(const char *pool_name) +{ + nvlist_t *innvl = fnvlist_alloc(); + fnvlist_add_boolean_value(innvl, "force", B_TRUE); + if (lzc_sync(pool_name, innvl, NULL) != 0) { + perror("lzc_sync"); + exit(2); + } + nvlist_free(innvl); +} + int main(int argc, char **argv) { @@ -75,12 +89,14 @@ main(int argc, char **argv) char *buf = NULL; int err; - if (argc != 4) { + if (argc != 5) { (void) printf("usage: %s " - "\n", argv[0]); + " \n", argv[0]); exit(1); } + (void) libzfs_core_init(); + int fd = open(file_path, O_RDWR | O_CREAT, 0666); if (fd == -1) { (void) fprintf(stderr, "%s: %s: ", execname, file_path); @@ -90,6 +106,7 @@ main(int argc, char **argv) off_t file_size = atoi(argv[2]); off_t block_size = atoi(argv[3]); + const char *pool_name = argv[4]; if (block_size * 2 > file_size) { (void) fprintf(stderr, "file size must be at least " @@ -143,6 +160,12 @@ main(int argc, char **argv) /* Punch a hole (required compression be enabled). */ memset(buf + block_size, 0, block_size); + err = msync(buf, file_size, MS_SYNC); + if (err == -1) { + perror("msync"); + exit(2); + } + sync_pool(pool_name); seek_data(fd, 0, 0); seek_data(fd, block_size, 2 * block_size); seek_hole(fd, 0, block_size); diff --git a/tests/zfs-tests/include/tunables.cfg b/tests/zfs-tests/include/tunables.cfg index 2024c44cc138..015b45b3108d 100644 --- a/tests/zfs-tests/include/tunables.cfg +++ b/tests/zfs-tests/include/tunables.cfg @@ -38,7 +38,6 @@ DEADMAN_FAILMODE deadman.failmode zfs_deadman_failmode DEADMAN_SYNCTIME_MS deadman.synctime_ms zfs_deadman_synctime_ms DEADMAN_ZIOTIME_MS deadman.ziotime_ms zfs_deadman_ziotime_ms DISABLE_IVSET_GUID_CHECK disable_ivset_guid_check zfs_disable_ivset_guid_check -DMU_OFFSET_NEXT_SYNC dmu_offset_next_sync zfs_dmu_offset_next_sync EMBEDDED_SLOG_MIN_MS embedded_slog_min_ms zfs_embedded_slog_min_ms INITIALIZE_CHUNK_SIZE initialize_chunk_size zfs_initialize_chunk_size INITIALIZE_VALUE initialize_value zfs_initialize_value diff --git a/tests/zfs-tests/tests/functional/mmap/mmap_seek_001_pos.ksh b/tests/zfs-tests/tests/functional/mmap/mmap_seek_001_pos.ksh index c09746b4b66a..6e465bd21c0a 100755 --- a/tests/zfs-tests/tests/functional/mmap/mmap_seek_001_pos.ksh +++ b/tests/zfs-tests/tests/functional/mmap/mmap_seek_001_pos.ksh @@ -43,24 +43,19 @@ function cleanup log_must zfs set compression=off $TESTPOOL/$TESTFS log_must zfs set recordsize=128k $TESTPOOL/$TESTFS log_must rm -f $TESTDIR/test-mmap-file - log_must set_tunable64 DMU_OFFSET_NEXT_SYNC $dmu_offset_next_sync } log_assert "lseek() data/holes for an mmap()'d file." log_onexit cleanup -# Enable hole reporting for dirty files. -typeset dmu_offset_next_sync=$(get_tunable DMU_OFFSET_NEXT_SYNC) -log_must set_tunable64 DMU_OFFSET_NEXT_SYNC 1 - # Compression must be enabled to convert zero'd blocks to holes. # This behavior is checked by the mmap_seek test. log_must zfs set compression=on $TESTPOOL/$TESTFS for bs in 4096 8192 16384 32768 65536 131072; do log_must zfs set recordsize=$bs $TESTPOOL/$TESTFS - log_must mmap_seek $TESTDIR/test-mmap-file $((1024*1024)) $bs + log_must mmap_seek $TESTDIR/test-mmap-file $((1024*1024)) $bs $TESTPOOL log_must rm $TESTDIR/test-mmap-file done