diff --git a/META b/META index 185cca4a44d4..dc19ac37b355 100644 --- a/META +++ b/META @@ -6,5 +6,5 @@ Release: 1 Release-Tags: relext License: CDDL Author: OpenZFS -Linux-Maximum: 6.11 +Linux-Maximum: 6.12 Linux-Minimum: 4.18 diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 0179a2714cab..aba99fabbbb9 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -1967,17 +1967,53 @@ dump_dedup_ratio(const ddt_stat_t *dds) static void dump_ddt_log(ddt_t *ddt) { + if (ddt->ddt_version != DDT_VERSION_FDT || + !(ddt->ddt_flags & DDT_FLAG_LOG)) + return; + for (int n = 0; n < 2; n++) { ddt_log_t *ddl = &ddt->ddt_log[n]; - uint64_t count = avl_numnodes(&ddl->ddl_tree); - if (count == 0) - continue; + char flagstr[64] = {0}; + if (ddl->ddl_flags > 0) { + flagstr[0] = ' '; + int c = 1; + if (ddl->ddl_flags & DDL_FLAG_FLUSHING) + c += strlcpy(&flagstr[c], " FLUSHING", + sizeof (flagstr) - c); + if (ddl->ddl_flags & DDL_FLAG_CHECKPOINT) + c += strlcpy(&flagstr[c], " CHECKPOINT", + sizeof (flagstr) - c); + if (ddl->ddl_flags & + ~(DDL_FLAG_FLUSHING|DDL_FLAG_CHECKPOINT)) + c += strlcpy(&flagstr[c], " UNKNOWN", + sizeof (flagstr) - c); + flagstr[1] = '['; + flagstr[c++] = ']'; + } - printf(DMU_POOL_DDT_LOG ": %lu log entries\n", - zio_checksum_table[ddt->ddt_checksum].ci_name, n, count); + uint64_t count = avl_numnodes(&ddl->ddl_tree); - if (dump_opt['D'] < 4) + printf(DMU_POOL_DDT_LOG ": flags=0x%02x%s; obj=%llu; " + "len=%llu; txg=%llu; entries=%llu\n", + zio_checksum_table[ddt->ddt_checksum].ci_name, n, + ddl->ddl_flags, flagstr, + (u_longlong_t)ddl->ddl_object, + (u_longlong_t)ddl->ddl_length, + (u_longlong_t)ddl->ddl_first_txg, (u_longlong_t)count); + + if (ddl->ddl_flags & DDL_FLAG_CHECKPOINT) { + const ddt_key_t *ddk = &ddl->ddl_checkpoint; + printf(" checkpoint: " + "%016llx:%016llx:%016llx:%016llx:%016llx\n", + (u_longlong_t)ddk->ddk_cksum.zc_word[0], + (u_longlong_t)ddk->ddk_cksum.zc_word[1], + (u_longlong_t)ddk->ddk_cksum.zc_word[2], + (u_longlong_t)ddk->ddk_cksum.zc_word[3], + (u_longlong_t)ddk->ddk_prop); + } + + if (count == 0 || dump_opt['D'] < 4) continue; ddt_lightweight_entry_t ddlwe; @@ -1991,7 +2027,7 @@ dump_ddt_log(ddt_t *ddt) } static void -dump_ddt(ddt_t *ddt, ddt_type_t type, ddt_class_t class) +dump_ddt_object(ddt_t *ddt, ddt_type_t type, ddt_class_t class) { char name[DDT_NAMELEN]; ddt_lightweight_entry_t ddlwe; @@ -2016,11 +2052,8 @@ dump_ddt(ddt_t *ddt, ddt_type_t type, ddt_class_t class) ddt_object_name(ddt, type, class, name); - (void) printf("%s: %llu entries, size %llu on disk, %llu in core\n", - name, - (u_longlong_t)count, - (u_longlong_t)dspace, - (u_longlong_t)mspace); + (void) printf("%s: dspace=%llu; mspace=%llu; entries=%llu\n", name, + (u_longlong_t)dspace, (u_longlong_t)mspace, (u_longlong_t)count); if (dump_opt['D'] < 3) return; @@ -2043,24 +2076,52 @@ dump_ddt(ddt_t *ddt, ddt_type_t type, ddt_class_t class) (void) printf("\n"); } +static void +dump_ddt(ddt_t *ddt) +{ + if (!ddt || ddt->ddt_version == DDT_VERSION_UNCONFIGURED) + return; + + char flagstr[64] = {0}; + if (ddt->ddt_flags > 0) { + flagstr[0] = ' '; + int c = 1; + if (ddt->ddt_flags & DDT_FLAG_FLAT) + c += strlcpy(&flagstr[c], " FLAT", + sizeof (flagstr) - c); + if (ddt->ddt_flags & DDT_FLAG_LOG) + c += strlcpy(&flagstr[c], " LOG", + sizeof (flagstr) - c); + if (ddt->ddt_flags & ~DDT_FLAG_MASK) + c += strlcpy(&flagstr[c], " UNKNOWN", + sizeof (flagstr) - c); + flagstr[1] = '['; + flagstr[c] = ']'; + } + + printf("DDT-%s: version=%llu [%s]; flags=0x%02llx%s; rootobj=%llu\n", + zio_checksum_table[ddt->ddt_checksum].ci_name, + (u_longlong_t)ddt->ddt_version, + (ddt->ddt_version == 0) ? "LEGACY" : + (ddt->ddt_version == 1) ? "FDT" : "UNKNOWN", + (u_longlong_t)ddt->ddt_flags, flagstr, + (u_longlong_t)ddt->ddt_dir_object); + + for (ddt_type_t type = 0; type < DDT_TYPES; type++) + for (ddt_class_t class = 0; class < DDT_CLASSES; class++) + dump_ddt_object(ddt, type, class); + + dump_ddt_log(ddt); +} + static void dump_all_ddts(spa_t *spa) { ddt_histogram_t ddh_total = {{{0}}}; ddt_stat_t dds_total = {0}; - for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) { - ddt_t *ddt = spa->spa_ddt[c]; - if (!ddt || ddt->ddt_version == DDT_VERSION_UNCONFIGURED) - continue; - for (ddt_type_t type = 0; type < DDT_TYPES; type++) { - for (ddt_class_t class = 0; class < DDT_CLASSES; - class++) { - dump_ddt(ddt, type, class); - } - } - dump_ddt_log(ddt); - } + for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) + dump_ddt(spa->spa_ddt[c]); ddt_get_dedup_stats(spa, &dds_total); @@ -6892,7 +6953,7 @@ iterate_deleted_livelists(spa_t *spa, ll_iter_t func, void *arg) for (zap_cursor_init(&zc, mos, zap_obj); zap_cursor_retrieve(&zc, attrp) == 0; (void) zap_cursor_advance(&zc)) { - dsl_deadlist_open(&ll, mos, attrp->za_first_integer); + VERIFY0(dsl_deadlist_open(&ll, mos, attrp->za_first_integer)); func(&ll, arg); dsl_deadlist_close(&ll); } diff --git a/cmd/zdb/zdb_il.c b/cmd/zdb/zdb_il.c index 80d81c1154ae..d0a9bf1aacb6 100644 --- a/cmd/zdb/zdb_il.c +++ b/cmd/zdb/zdb_il.c @@ -67,19 +67,19 @@ zil_prt_rec_create(zilog_t *zilog, int txtype, const void *arg) const lr_create_t *lrc = arg; const _lr_create_t *lr = &lrc->lr_create; time_t crtime = lr->lr_crtime[0]; - char *name, *link; + const char *name, *link; lr_attr_t *lrattr; - name = (char *)(lr + 1); + name = (const char *)&lrc->lr_data[0]; if (lr->lr_common.lrc_txtype == TX_CREATE_ATTR || lr->lr_common.lrc_txtype == TX_MKDIR_ATTR) { - lrattr = (lr_attr_t *)(lr + 1); + lrattr = (lr_attr_t *)&lrc->lr_data[0]; name += ZIL_XVAT_SIZE(lrattr->lr_attr_masksize); } if (txtype == TX_SYMLINK) { - link = name + strlen(name) + 1; + link = (const char *)&lrc->lr_data[strlen(name) + 1]; (void) printf("%s%s -> %s\n", tab_prefix, name, link); } else if (txtype != TX_MKXATTR) { (void) printf("%s%s\n", tab_prefix, name); @@ -104,7 +104,7 @@ zil_prt_rec_remove(zilog_t *zilog, int txtype, const void *arg) const lr_remove_t *lr = arg; (void) printf("%sdoid %llu, name %s\n", tab_prefix, - (u_longlong_t)lr->lr_doid, (char *)(lr + 1)); + (u_longlong_t)lr->lr_doid, (const char *)&lr->lr_data[0]); } static void @@ -115,7 +115,7 @@ zil_prt_rec_link(zilog_t *zilog, int txtype, const void *arg) (void) printf("%sdoid %llu, link_obj %llu, name %s\n", tab_prefix, (u_longlong_t)lr->lr_doid, (u_longlong_t)lr->lr_link_obj, - (char *)(lr + 1)); + (const char *)&lr->lr_data[0]); } static void @@ -124,8 +124,8 @@ zil_prt_rec_rename(zilog_t *zilog, int txtype, const void *arg) (void) zilog, (void) txtype; const lr_rename_t *lrr = arg; const _lr_rename_t *lr = &lrr->lr_rename; - char *snm = (char *)(lr + 1); - char *tnm = snm + strlen(snm) + 1; + const char *snm = (const char *)&lrr->lr_data[0]; + const char *tnm = (const char *)&lrr->lr_data[strlen(snm) + 1]; (void) printf("%ssdoid %llu, tdoid %llu\n", tab_prefix, (u_longlong_t)lr->lr_sdoid, (u_longlong_t)lr->lr_tdoid); @@ -211,7 +211,7 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, const void *arg) /* data is stored after the end of the lr_write record */ data = abd_alloc(lr->lr_length, B_FALSE); - abd_copy_from_buf(data, lr + 1, lr->lr_length); + abd_copy_from_buf(data, &lr->lr_data[0], lr->lr_length); } (void) printf("%s", tab_prefix); @@ -309,7 +309,7 @@ zil_prt_rec_setsaxattr(zilog_t *zilog, int txtype, const void *arg) (void) zilog, (void) txtype; const lr_setsaxattr_t *lr = arg; - char *name = (char *)(lr + 1); + const char *name = (const char *)&lr->lr_data[0]; (void) printf("%sfoid %llu\n", tab_prefix, (u_longlong_t)lr->lr_foid); @@ -318,7 +318,7 @@ zil_prt_rec_setsaxattr(zilog_t *zilog, int txtype, const void *arg) (void) printf("%sXAT_VALUE NULL\n", tab_prefix); } else { (void) printf("%sXAT_VALUE ", tab_prefix); - char *val = name + (strlen(name) + 1); + const char *val = (const char *)&lr->lr_data[strlen(name) + 1]; for (int i = 0; i < lr->lr_size; i++) { (void) printf("%c", *val); val++; diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 4458b902de31..506427a10672 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -512,7 +512,8 @@ get_usage(zpool_help_t idx) return (gettext("\tinitialize [-c | -s | -u] [-w] " "[ ...]\n")); case HELP_SCRUB: - return (gettext("\tscrub [-s | -p] [-w] [-e] ...\n")); + return (gettext("\tscrub [-e | -s | -p | -C] [-w] " + " ...\n")); case HELP_RESILVER: return (gettext("\tresilver ...\n")); case HELP_TRIM: @@ -8429,12 +8430,13 @@ wait_callback(zpool_handle_t *zhp, void *data) } /* - * zpool scrub [-s | -p] [-w] [-e] ... + * zpool scrub [-e | -s | -p | -C] [-w] ... * * -e Only scrub blocks in the error log. * -s Stop. Stops any in-progress scrub. * -p Pause. Pause in-progress scrub. * -w Wait. Blocks until scrub has completed. + * -C Scrub from last saved txg. */ int zpool_do_scrub(int argc, char **argv) @@ -8450,9 +8452,10 @@ zpool_do_scrub(int argc, char **argv) boolean_t is_error_scrub = B_FALSE; boolean_t is_pause = B_FALSE; boolean_t is_stop = B_FALSE; + boolean_t is_txg_continue = B_FALSE; /* check options */ - while ((c = getopt(argc, argv, "spwe")) != -1) { + while ((c = getopt(argc, argv, "spweC")) != -1) { switch (c) { case 'e': is_error_scrub = B_TRUE; @@ -8466,6 +8469,9 @@ zpool_do_scrub(int argc, char **argv) case 'w': wait = B_TRUE; break; + case 'C': + is_txg_continue = B_TRUE; + break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); @@ -8477,6 +8483,18 @@ zpool_do_scrub(int argc, char **argv) (void) fprintf(stderr, gettext("invalid option " "combination :-s and -p are mutually exclusive\n")); usage(B_FALSE); + } else if (is_pause && is_txg_continue) { + (void) fprintf(stderr, gettext("invalid option " + "combination :-p and -C are mutually exclusive\n")); + usage(B_FALSE); + } else if (is_stop && is_txg_continue) { + (void) fprintf(stderr, gettext("invalid option " + "combination :-s and -C are mutually exclusive\n")); + usage(B_FALSE); + } else if (is_error_scrub && is_txg_continue) { + (void) fprintf(stderr, gettext("invalid option " + "combination :-e and -C are mutually exclusive\n")); + usage(B_FALSE); } else { if (is_error_scrub) cb.cb_type = POOL_SCAN_ERRORSCRUB; @@ -8485,6 +8503,8 @@ zpool_do_scrub(int argc, char **argv) cb.cb_scrub_cmd = POOL_SCRUB_PAUSE; } else if (is_stop) { cb.cb_type = POOL_SCAN_NONE; + } else if (is_txg_continue) { + cb.cb_scrub_cmd = POOL_SCRUB_FROM_LAST_TXG; } else { cb.cb_scrub_cmd = POOL_SCRUB_NORMAL; } @@ -10034,9 +10054,8 @@ print_removal_status(zpool_handle_t *zhp, pool_removal_stat_t *prs) (void) printf(gettext("Removal of %s canceled on %s"), vdev_name, ctime(&end)); } else { - uint64_t copied, total, elapsed, mins_left, hours_left; + uint64_t copied, total, elapsed, rate, mins_left, hours_left; double fraction_done; - uint_t rate; assert(prs->prs_state == DSS_SCANNING); @@ -10132,9 +10151,8 @@ print_raidz_expand_status(zpool_handle_t *zhp, pool_raidz_expand_stat_t *pres) copied_buf, time_buf, ctime((time_t *)&end)); } else { char examined_buf[7], total_buf[7], rate_buf[7]; - uint64_t copied, total, elapsed, secs_left; + uint64_t copied, total, elapsed, rate, secs_left; double fraction_done; - uint_t rate; assert(pres->pres_state == DSS_SCANNING); diff --git a/config/kernel-register_sysctl_table.m4 b/config/kernel-register_sysctl_table.m4 index 12ffe9d95142..8dc17e2d42f9 100644 --- a/config/kernel-register_sysctl_table.m4 +++ b/config/kernel-register_sysctl_table.m4 @@ -36,7 +36,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_SZ], [ ZFS_LINUX_TEST_SRC([has_register_sysctl_sz], [ #include ],[ - struct ctl_table test_table[] __attribute__((unused)) = {0}; + struct ctl_table test_table[] __attribute__((unused)) = {{}}; register_sysctl_sz("", test_table, 0); ]) ]) diff --git a/include/os/freebsd/spl/sys/debug.h b/include/os/freebsd/spl/sys/debug.h index 615f97351ec4..fd22e6b001dc 100644 --- a/include/os/freebsd/spl/sys/debug.h +++ b/include/os/freebsd/spl/sys/debug.h @@ -105,7 +105,7 @@ spl_assert(const char *buf, const char *file, const char *func, int line) __FILE__, __FUNCTION__, __LINE__)) #define VERIFYF(cond, str, ...) do { \ - if (unlikely(!cond)) \ + if (unlikely(!(cond))) \ spl_panic(__FILE__, __FUNCTION__, __LINE__, \ "VERIFY(" #cond ") failed " str "\n", __VA_ARGS__);\ } while (0) diff --git a/include/os/freebsd/spl/sys/sdt.h b/include/os/freebsd/spl/sys/sdt.h index e2c4830cb964..aa3688718ae7 100644 --- a/include/os/freebsd/spl/sys/sdt.h +++ b/include/os/freebsd/spl/sys/sdt.h @@ -31,9 +31,9 @@ #include_next #ifdef KDTRACE_HOOKS -/* BEGIN CSTYLED */ SDT_PROBE_DECLARE(sdt, , , set__error); +/* BEGIN CSTYLED */ #define SET_ERROR(err) ({ \ SDT_PROBE1(sdt, , , set__error, (uintptr_t)err); \ err; \ diff --git a/include/os/freebsd/spl/sys/vnode.h b/include/os/freebsd/spl/sys/vnode.h index 76ea3eff3792..b9d3e81d0812 100644 --- a/include/os/freebsd/spl/sys/vnode.h +++ b/include/os/freebsd/spl/sys/vnode.h @@ -68,47 +68,30 @@ enum symfollow { NO_FOLLOW = NOFOLLOW }; #include typedef struct vop_vector vnodeops_t; -#define VOP_FID VOP_VPTOFH #define vop_fid vop_vptofh #define vop_fid_args vop_vptofh_args #define a_fid a_fhp -#define rootvfs (rootvnode == NULL ? NULL : rootvnode->v_mount) - -#ifndef IN_BASE -static __inline int -vn_is_readonly(vnode_t *vp) -{ - return (vp->v_mount->mnt_flag & MNT_RDONLY); -} -#endif #define vn_vfswlock(vp) (0) #define vn_vfsunlock(vp) do { } while (0) -#define vn_ismntpt(vp) \ - ((vp)->v_type == VDIR && (vp)->v_mountedhere != NULL) -#define vn_mountedvfs(vp) ((vp)->v_mountedhere) + +#ifndef IN_BASE #define vn_has_cached_data(vp) \ ((vp)->v_object != NULL && \ (vp)->v_object->resident_page_count > 0) -#ifndef IN_BASE static __inline void vn_flush_cached_data(vnode_t *vp, boolean_t sync) { if (vm_object_mightbedirty(vp->v_object)) { int flags = sync ? OBJPC_SYNC : 0; - vn_lock(vp, LK_SHARED | LK_RETRY); zfs_vmobject_wlock(vp->v_object); vm_object_page_clean(vp->v_object, 0, 0, flags); zfs_vmobject_wunlock(vp->v_object); - VOP_UNLOCK(vp); } } #endif -#define vn_exists(vp) do { } while (0) -#define vn_invalid(vp) do { } while (0) -#define vn_free(vp) do { } while (0) #define vn_matchops(vp, vops) ((vp)->v_op == &(vops)) #define VN_HOLD(v) vref(v) @@ -123,9 +106,6 @@ vn_flush_cached_data(vnode_t *vp, boolean_t sync) #define vnevent_rename_dest(vp, dvp, name, ct) do { } while (0) #define vnevent_rename_dest_dir(vp, ct) do { } while (0) -#define specvp(vp, rdev, type, cr) (VN_HOLD(vp), (vp)) -#define MANDLOCK(vp, mode) (0) - /* * We will use va_spare is place of Solaris' va_mask. * This field is initialized in zfs_setattr(). diff --git a/include/os/linux/spl/sys/debug.h b/include/os/linux/spl/sys/debug.h index 38cc57ae0ca7..3459d6979fe8 100644 --- a/include/os/linux/spl/sys/debug.h +++ b/include/os/linux/spl/sys/debug.h @@ -109,7 +109,7 @@ spl_assert(const char *buf, const char *file, const char *func, int line) __FILE__, __FUNCTION__, __LINE__)) #define VERIFYF(cond, str, ...) do { \ - if (unlikely(!cond)) \ + if (unlikely(!(cond))) \ spl_panic(__FILE__, __FUNCTION__, __LINE__, \ "VERIFY(" #cond ") failed " str "\n", __VA_ARGS__);\ } while (0) diff --git a/include/sys/arc.h b/include/sys/arc.h index 5148905c93d8..0e0a828c1d93 100644 --- a/include/sys/arc.h +++ b/include/sys/arc.h @@ -63,8 +63,15 @@ extern "C" { (hdr)->b_psize = ((x) >> SPA_MINBLOCKSHIFT); \ } while (0) +/* The l2size in the header is only used by L2 cache */ +#define HDR_SET_L2SIZE(hdr, x) do { \ + ASSERT(IS_P2ALIGNED((x), 1U << SPA_MINBLOCKSHIFT)); \ + (hdr)->b_l2size = ((x) >> SPA_MINBLOCKSHIFT); \ +} while (0) + #define HDR_GET_LSIZE(hdr) ((hdr)->b_lsize << SPA_MINBLOCKSHIFT) #define HDR_GET_PSIZE(hdr) ((hdr)->b_psize << SPA_MINBLOCKSHIFT) +#define HDR_GET_L2SIZE(hdr) ((hdr)->b_l2size << SPA_MINBLOCKSHIFT) typedef struct arc_buf_hdr arc_buf_hdr_t; typedef struct arc_buf arc_buf_t; @@ -322,8 +329,10 @@ void arc_freed(spa_t *spa, const blkptr_t *bp); int arc_cached(spa_t *spa, const blkptr_t *bp); void arc_flush(spa_t *spa, boolean_t retry); +void arc_flush_async(spa_t *spa); void arc_tempreserve_clear(uint64_t reserve); int arc_tempreserve_space(spa_t *spa, uint64_t reserve, uint64_t txg); +boolean_t arc_async_flush_guid_inuse(uint64_t load_guid); uint64_t arc_all_memory(void); uint64_t arc_default_max(uint64_t min, uint64_t allmem); diff --git a/include/sys/arc_impl.h b/include/sys/arc_impl.h index b2839bdf1485..2cf6aa3b1825 100644 --- a/include/sys/arc_impl.h +++ b/include/sys/arc_impl.h @@ -378,8 +378,8 @@ typedef struct l2arc_lb_ptr_buf { * L2ARC Internals */ typedef struct l2arc_dev { - vdev_t *l2ad_vdev; /* vdev */ - spa_t *l2ad_spa; /* spa */ + vdev_t *l2ad_vdev; /* can be NULL during remove */ + spa_t *l2ad_spa; /* can be NULL during remove */ uint64_t l2ad_hand; /* next write location */ uint64_t l2ad_start; /* first addr on device */ uint64_t l2ad_end; /* last addr on device */ @@ -475,8 +475,8 @@ struct arc_buf_hdr { arc_buf_contents_t b_type; uint8_t b_complevel; - uint8_t b_reserved1; /* used for 4 byte alignment */ - uint16_t b_reserved2; /* used for 4 byte alignment */ + uint8_t b_reserved1; /* used for 4 byte alignment */ + uint16_t b_l2size; /* alignment or L2-only size */ arc_buf_hdr_t *b_hash_next; arc_flags_t b_flags; diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 22cbd7fc73b6..29f715039d29 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -381,6 +381,7 @@ typedef struct dmu_buf { #define DMU_POOL_CREATION_VERSION "creation_version" #define DMU_POOL_SCAN "scan" #define DMU_POOL_ERRORSCRUB "error_scrub" +#define DMU_POOL_LAST_SCRUBBED_TXG "last_scrubbed_txg" #define DMU_POOL_FREE_BPOBJ "free_bpobj" #define DMU_POOL_BPTREE_OBJ "bptree_obj" #define DMU_POOL_EMPTY_BPOBJ "empty_bpobj" diff --git a/include/sys/dsl_deadlist.h b/include/sys/dsl_deadlist.h index 3feb3bbf062f..798f9e3f6245 100644 --- a/include/sys/dsl_deadlist.h +++ b/include/sys/dsl_deadlist.h @@ -89,7 +89,7 @@ extern int zfs_livelist_min_percent_shared; typedef int deadlist_iter_t(void *args, dsl_deadlist_entry_t *dle); -void dsl_deadlist_open(dsl_deadlist_t *dl, objset_t *os, uint64_t object); +int dsl_deadlist_open(dsl_deadlist_t *dl, objset_t *os, uint64_t object); void dsl_deadlist_close(dsl_deadlist_t *dl); void dsl_deadlist_iterate(dsl_deadlist_t *dl, deadlist_iter_t func, void *arg); uint64_t dsl_deadlist_alloc(objset_t *os, dmu_tx_t *tx); diff --git a/include/sys/dsl_dir.h b/include/sys/dsl_dir.h index f7c0d9acd10d..04ea7a8f293b 100644 --- a/include/sys/dsl_dir.h +++ b/include/sys/dsl_dir.h @@ -198,7 +198,7 @@ void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx); void dsl_dir_zapify(dsl_dir_t *dd, dmu_tx_t *tx); boolean_t dsl_dir_is_zapified(dsl_dir_t *dd); -void dsl_dir_livelist_open(dsl_dir_t *dd, uint64_t obj); +int dsl_dir_livelist_open(dsl_dir_t *dd, uint64_t obj); void dsl_dir_livelist_close(dsl_dir_t *dd); void dsl_dir_remove_livelist(dsl_dir_t *dd, dmu_tx_t *tx, boolean_t total); int dsl_dir_wait(dsl_dir_t *dd, dsl_dataset_t *ds, zfs_wait_activity_t activity, diff --git a/include/sys/dsl_scan.h b/include/sys/dsl_scan.h index 63734dbc176f..ef181c3ff2cd 100644 --- a/include/sys/dsl_scan.h +++ b/include/sys/dsl_scan.h @@ -179,6 +179,12 @@ typedef struct dsl_scan { dsl_errorscrub_phys_t errorscrub_phys; } dsl_scan_t; +typedef struct { + pool_scan_func_t func; + uint64_t txgstart; + uint64_t txgend; +} setup_sync_arg_t; + typedef struct dsl_scan_io_queue dsl_scan_io_queue_t; void scan_init(void); @@ -189,7 +195,8 @@ void dsl_scan_setup_sync(void *, dmu_tx_t *); void dsl_scan_fini(struct dsl_pool *dp); void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *); int dsl_scan_cancel(struct dsl_pool *); -int dsl_scan(struct dsl_pool *, pool_scan_func_t); +int dsl_scan(struct dsl_pool *, pool_scan_func_t, uint64_t starttxg, + uint64_t txgend); void dsl_scan_assess_vdev(struct dsl_pool *dp, vdev_t *vd); boolean_t dsl_scan_scrubbing(const struct dsl_pool *dp); boolean_t dsl_errorscrubbing(const struct dsl_pool *dp); diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 1676020d04d3..dc474e3739f3 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -265,6 +265,7 @@ typedef enum { ZPOOL_PROP_DEDUP_TABLE_SIZE, ZPOOL_PROP_DEDUP_TABLE_QUOTA, ZPOOL_PROP_DEDUPCACHED, + ZPOOL_PROP_LAST_SCRUBBED_TXG, ZPOOL_NUM_PROPS } zpool_prop_t; @@ -1088,6 +1089,7 @@ typedef enum pool_scan_func { typedef enum pool_scrub_cmd { POOL_SCRUB_NORMAL = 0, POOL_SCRUB_PAUSE, + POOL_SCRUB_FROM_LAST_TXG, POOL_SCRUB_FLAGS_END } pool_scrub_cmd_t; diff --git a/include/sys/spa.h b/include/sys/spa.h index 52601921fc3c..23c20294d1f8 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -822,6 +822,8 @@ extern void spa_l2cache_drop(spa_t *spa); /* scanning */ extern int spa_scan(spa_t *spa, pool_scan_func_t func); +extern int spa_scan_range(spa_t *spa, pool_scan_func_t func, uint64_t txgstart, + uint64_t txgend); extern int spa_scan_stop(spa_t *spa); extern int spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t flag); @@ -1080,6 +1082,7 @@ extern uint64_t spa_get_deadman_failmode(spa_t *spa); extern void spa_set_deadman_failmode(spa_t *spa, const char *failmode); extern boolean_t spa_suspended(spa_t *spa); extern uint64_t spa_bootfs(spa_t *spa); +extern uint64_t spa_get_last_scrubbed_txg(spa_t *spa); extern uint64_t spa_delegation(spa_t *spa); extern objset_t *spa_meta_objset(spa_t *spa); extern space_map_t *spa_syncing_log_sm(spa_t *spa); @@ -1103,6 +1106,7 @@ extern boolean_t spa_guid_exists(uint64_t pool_guid, uint64_t device_guid); extern char *spa_strdup(const char *); extern void spa_strfree(char *); extern uint64_t spa_generate_guid(spa_t *spa); +extern uint64_t spa_generate_load_guid(void); extern void snprintf_blkptr(char *buf, size_t buflen, const blkptr_t *bp); extern void spa_freeze(spa_t *spa); extern int spa_change_guid(spa_t *spa, const uint64_t *guidp); diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index d1da87105103..b0a2d46ff2c4 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -318,6 +318,7 @@ struct spa { uint64_t spa_scan_pass_scrub_spent_paused; /* total paused */ uint64_t spa_scan_pass_exam; /* examined bytes per pass */ uint64_t spa_scan_pass_issued; /* issued bytes per pass */ + uint64_t spa_scrubbed_last_txg; /* last txg scrubbed */ /* error scrub pause time in milliseconds */ uint64_t spa_scan_pass_errorscrub_pause; diff --git a/lib/libspl/atomic.c b/lib/libspl/atomic.c index 8cc350710ba0..f61f5fcc47f5 100644 --- a/lib/libspl/atomic.c +++ b/lib/libspl/atomic.c @@ -35,7 +35,6 @@ (void) __atomic_add_fetch(target, 1, __ATOMIC_SEQ_CST); \ } -/* BEGIN CSTYLED */ ATOMIC_INC(8, uint8_t) ATOMIC_INC(16, uint16_t) ATOMIC_INC(32, uint32_t) @@ -44,7 +43,6 @@ ATOMIC_INC(uchar, uchar_t) ATOMIC_INC(ushort, ushort_t) ATOMIC_INC(uint, uint_t) ATOMIC_INC(ulong, ulong_t) -/* END CSTYLED */ #define ATOMIC_DEC(name, type) \ @@ -53,7 +51,6 @@ ATOMIC_INC(ulong, ulong_t) (void) __atomic_sub_fetch(target, 1, __ATOMIC_SEQ_CST); \ } -/* BEGIN CSTYLED */ ATOMIC_DEC(8, uint8_t) ATOMIC_DEC(16, uint16_t) ATOMIC_DEC(32, uint32_t) @@ -62,7 +59,6 @@ ATOMIC_DEC(uchar, uchar_t) ATOMIC_DEC(ushort, ushort_t) ATOMIC_DEC(uint, uint_t) ATOMIC_DEC(ulong, ulong_t) -/* END CSTYLED */ #define ATOMIC_ADD(name, type1, type2) \ @@ -77,7 +73,6 @@ atomic_add_ptr(volatile void *target, ssize_t bits) (void) __atomic_add_fetch((void **)target, bits, __ATOMIC_SEQ_CST); } -/* BEGIN CSTYLED */ ATOMIC_ADD(8, uint8_t, int8_t) ATOMIC_ADD(16, uint16_t, int16_t) ATOMIC_ADD(32, uint32_t, int32_t) @@ -86,7 +81,6 @@ ATOMIC_ADD(char, uchar_t, signed char) ATOMIC_ADD(short, ushort_t, short) ATOMIC_ADD(int, uint_t, int) ATOMIC_ADD(long, ulong_t, long) -/* END CSTYLED */ #define ATOMIC_SUB(name, type1, type2) \ @@ -101,7 +95,6 @@ atomic_sub_ptr(volatile void *target, ssize_t bits) (void) __atomic_sub_fetch((void **)target, bits, __ATOMIC_SEQ_CST); } -/* BEGIN CSTYLED */ ATOMIC_SUB(8, uint8_t, int8_t) ATOMIC_SUB(16, uint16_t, int16_t) ATOMIC_SUB(32, uint32_t, int32_t) @@ -110,7 +103,6 @@ ATOMIC_SUB(char, uchar_t, signed char) ATOMIC_SUB(short, ushort_t, short) ATOMIC_SUB(int, uint_t, int) ATOMIC_SUB(long, ulong_t, long) -/* END CSTYLED */ #define ATOMIC_OR(name, type) \ @@ -119,7 +111,6 @@ ATOMIC_SUB(long, ulong_t, long) (void) __atomic_or_fetch(target, bits, __ATOMIC_SEQ_CST); \ } -/* BEGIN CSTYLED */ ATOMIC_OR(8, uint8_t) ATOMIC_OR(16, uint16_t) ATOMIC_OR(32, uint32_t) @@ -128,7 +119,6 @@ ATOMIC_OR(uchar, uchar_t) ATOMIC_OR(ushort, ushort_t) ATOMIC_OR(uint, uint_t) ATOMIC_OR(ulong, ulong_t) -/* END CSTYLED */ #define ATOMIC_AND(name, type) \ @@ -137,7 +127,6 @@ ATOMIC_OR(ulong, ulong_t) (void) __atomic_and_fetch(target, bits, __ATOMIC_SEQ_CST); \ } -/* BEGIN CSTYLED */ ATOMIC_AND(8, uint8_t) ATOMIC_AND(16, uint16_t) ATOMIC_AND(32, uint32_t) @@ -146,7 +135,6 @@ ATOMIC_AND(uchar, uchar_t) ATOMIC_AND(ushort, ushort_t) ATOMIC_AND(uint, uint_t) ATOMIC_AND(ulong, ulong_t) -/* END CSTYLED */ /* @@ -159,7 +147,6 @@ ATOMIC_AND(ulong, ulong_t) return (__atomic_add_fetch(target, 1, __ATOMIC_SEQ_CST)); \ } -/* BEGIN CSTYLED */ ATOMIC_INC_NV(8, uint8_t) ATOMIC_INC_NV(16, uint16_t) ATOMIC_INC_NV(32, uint32_t) @@ -168,7 +155,6 @@ ATOMIC_INC_NV(uchar, uchar_t) ATOMIC_INC_NV(ushort, ushort_t) ATOMIC_INC_NV(uint, uint_t) ATOMIC_INC_NV(ulong, ulong_t) -/* END CSTYLED */ #define ATOMIC_DEC_NV(name, type) \ @@ -177,7 +163,6 @@ ATOMIC_INC_NV(ulong, ulong_t) return (__atomic_sub_fetch(target, 1, __ATOMIC_SEQ_CST)); \ } -/* BEGIN CSTYLED */ ATOMIC_DEC_NV(8, uint8_t) ATOMIC_DEC_NV(16, uint16_t) ATOMIC_DEC_NV(32, uint32_t) @@ -186,7 +171,6 @@ ATOMIC_DEC_NV(uchar, uchar_t) ATOMIC_DEC_NV(ushort, ushort_t) ATOMIC_DEC_NV(uint, uint_t) ATOMIC_DEC_NV(ulong, ulong_t) -/* END CSTYLED */ #define ATOMIC_ADD_NV(name, type1, type2) \ @@ -201,7 +185,6 @@ atomic_add_ptr_nv(volatile void *target, ssize_t bits) return (__atomic_add_fetch((void **)target, bits, __ATOMIC_SEQ_CST)); } -/* BEGIN CSTYLED */ ATOMIC_ADD_NV(8, uint8_t, int8_t) ATOMIC_ADD_NV(16, uint16_t, int16_t) ATOMIC_ADD_NV(32, uint32_t, int32_t) @@ -210,7 +193,6 @@ ATOMIC_ADD_NV(char, uchar_t, signed char) ATOMIC_ADD_NV(short, ushort_t, short) ATOMIC_ADD_NV(int, uint_t, int) ATOMIC_ADD_NV(long, ulong_t, long) -/* END CSTYLED */ #define ATOMIC_SUB_NV(name, type1, type2) \ @@ -225,7 +207,6 @@ atomic_sub_ptr_nv(volatile void *target, ssize_t bits) return (__atomic_sub_fetch((void **)target, bits, __ATOMIC_SEQ_CST)); } -/* BEGIN CSTYLED */ ATOMIC_SUB_NV(8, uint8_t, int8_t) ATOMIC_SUB_NV(char, uchar_t, signed char) ATOMIC_SUB_NV(16, uint16_t, int16_t) @@ -234,7 +215,6 @@ ATOMIC_SUB_NV(32, uint32_t, int32_t) ATOMIC_SUB_NV(int, uint_t, int) ATOMIC_SUB_NV(long, ulong_t, long) ATOMIC_SUB_NV(64, uint64_t, int64_t) -/* END CSTYLED */ #define ATOMIC_OR_NV(name, type) \ @@ -243,7 +223,6 @@ ATOMIC_SUB_NV(64, uint64_t, int64_t) return (__atomic_or_fetch(target, bits, __ATOMIC_SEQ_CST)); \ } -/* BEGIN CSTYLED */ ATOMIC_OR_NV(8, uint8_t) ATOMIC_OR_NV(16, uint16_t) ATOMIC_OR_NV(32, uint32_t) @@ -252,7 +231,6 @@ ATOMIC_OR_NV(uchar, uchar_t) ATOMIC_OR_NV(ushort, ushort_t) ATOMIC_OR_NV(uint, uint_t) ATOMIC_OR_NV(ulong, ulong_t) -/* END CSTYLED */ #define ATOMIC_AND_NV(name, type) \ @@ -261,7 +239,6 @@ ATOMIC_OR_NV(ulong, ulong_t) return (__atomic_and_fetch(target, bits, __ATOMIC_SEQ_CST)); \ } -/* BEGIN CSTYLED */ ATOMIC_AND_NV(8, uint8_t) ATOMIC_AND_NV(16, uint16_t) ATOMIC_AND_NV(32, uint32_t) @@ -270,7 +247,6 @@ ATOMIC_AND_NV(uchar, uchar_t) ATOMIC_AND_NV(ushort, ushort_t) ATOMIC_AND_NV(uint, uint_t) ATOMIC_AND_NV(ulong, ulong_t) -/* END CSTYLED */ /* @@ -300,7 +276,6 @@ atomic_cas_ptr(volatile void *target, void *exp, void *des) return (exp); } -/* BEGIN CSTYLED */ ATOMIC_CAS(8, uint8_t) ATOMIC_CAS(16, uint16_t) ATOMIC_CAS(32, uint32_t) @@ -309,7 +284,6 @@ ATOMIC_CAS(uchar, uchar_t) ATOMIC_CAS(ushort, ushort_t) ATOMIC_CAS(uint, uint_t) ATOMIC_CAS(ulong, ulong_t) -/* END CSTYLED */ /* @@ -322,7 +296,6 @@ ATOMIC_CAS(ulong, ulong_t) return (__atomic_exchange_n(target, bits, __ATOMIC_SEQ_CST)); \ } -/* BEGIN CSTYLED */ ATOMIC_SWAP(8, uint8_t) ATOMIC_SWAP(16, uint16_t) ATOMIC_SWAP(32, uint32_t) @@ -331,7 +304,6 @@ ATOMIC_SWAP(uchar, uchar_t) ATOMIC_SWAP(ushort, ushort_t) ATOMIC_SWAP(uint, uint_t) ATOMIC_SWAP(ulong, ulong_t) -/* END CSTYLED */ void * atomic_swap_ptr(volatile void *target, void *bits) diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index ac9ae233c72d..1f9fde6677d8 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -3132,7 +3132,8 @@ - + + @@ -5984,7 +5985,8 @@ - + + diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c index 1f7e7b0e647e..7cc91f984a40 100644 --- a/lib/libzfs/libzfs_util.c +++ b/lib/libzfs/libzfs_util.c @@ -932,6 +932,7 @@ libzfs_run_process_impl(const char *path, char *argv[], char *env[], int flags, pid = fork(); if (pid == 0) { /* Child process */ + setpgid(0, 0); devnull_fd = open("/dev/null", O_WRONLY | O_CLOEXEC); if (devnull_fd < 0) diff --git a/man/man7/zpoolprops.7 b/man/man7/zpoolprops.7 index f4fcc620e4d9..7c0dd4caad3e 100644 --- a/man/man7/zpoolprops.7 +++ b/man/man7/zpoolprops.7 @@ -28,7 +28,7 @@ .\" Copyright (c) 2021, Colm Buckley .\" Copyright (c) 2023, Klara Inc. .\" -.Dd July 29, 2024 +.Dd November 18, 2024 .Dt ZPOOLPROPS 7 .Os . @@ -135,6 +135,19 @@ A unique identifier for the pool. The current health of the pool. Health can be one of .Sy ONLINE , DEGRADED , FAULTED , OFFLINE, REMOVED , UNAVAIL . +.It Sy last_scrubbed_txg +Indicates the transaction group (TXG) up to which the most recent scrub +operation has checked and repaired the dataset. +This provides insight into the data integrity status of their pool at +a specific point in time. +.Xr zpool-scrub 8 +can utilize this property to scan only data that has changed since the last +scrub completed, when given the +.Fl C +flag. +This property is not updated when performing an error scrub with the +.Fl e +flag. .It Sy leaked Space not released while .Sy freeing diff --git a/man/man8/zinject.8 b/man/man8/zinject.8 index ad9e7a42bfac..abccc4d086e0 100644 --- a/man/man8/zinject.8 +++ b/man/man8/zinject.8 @@ -23,7 +23,7 @@ .\" .\" lint-ok: WARNING: sections out of conventional order: Sh SYNOPSIS .\" -.Dd April 4, 2024 +.Dd December 2, 2024 .Dt ZINJECT 8 .Os . @@ -268,7 +268,7 @@ Run for this many seconds before reporting failure. .It Fl T Ar failure Set the failure type to one of .Sy all , -.Sy ioctl , +.Sy flush , .Sy claim , .Sy free , .Sy read , diff --git a/man/man8/zpool-scrub.8 b/man/man8/zpool-scrub.8 index 03f3ad4991f9..676286b038da 100644 --- a/man/man8/zpool-scrub.8 +++ b/man/man8/zpool-scrub.8 @@ -26,7 +26,7 @@ .\" Copyright 2017 Nexenta Systems, Inc. .\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved. .\" -.Dd June 22, 2023 +.Dd November 18, 2024 .Dt ZPOOL-SCRUB 8 .Os . @@ -36,9 +36,8 @@ .Sh SYNOPSIS .Nm zpool .Cm scrub -.Op Fl s Ns | Ns Fl p +.Op Ns Fl e | Ns Fl p | Fl s Ns | Fl C Ns .Op Fl w -.Op Fl e .Ar pool Ns … . .Sh DESCRIPTION @@ -114,6 +113,10 @@ The pool must have been scrubbed at least once with the feature enabled to use this option. Error scrubbing cannot be run simultaneously with regular scrubbing or resilvering, nor can it be run when a regular scrub is paused. +.It Fl C +Continue scrub from last saved txg (see zpool +.Sy last_scrubbed_txg +property). .El .Sh EXAMPLES .Ss Example 1 diff --git a/module/nvpair/nvpair.c b/module/nvpair/nvpair.c index 887f7d32df4a..9034873474fe 100644 --- a/module/nvpair/nvpair.c +++ b/module/nvpair/nvpair.c @@ -3281,7 +3281,6 @@ nvs_xdr_nvp_##type(XDR *xdrs, void *ptr, ...) \ #endif -/* BEGIN CSTYLED */ NVS_BUILD_XDRPROC_T(char); NVS_BUILD_XDRPROC_T(short); NVS_BUILD_XDRPROC_T(u_short); @@ -3289,7 +3288,6 @@ NVS_BUILD_XDRPROC_T(int); NVS_BUILD_XDRPROC_T(u_int); NVS_BUILD_XDRPROC_T(longlong_t); NVS_BUILD_XDRPROC_T(u_longlong_t); -/* END CSTYLED */ /* * The format of xdr encoded nvpair is: diff --git a/module/os/freebsd/spl/spl_dtrace.c b/module/os/freebsd/spl/spl_dtrace.c index 4b9cc65d641e..0a2fcf110d7b 100644 --- a/module/os/freebsd/spl/spl_dtrace.c +++ b/module/os/freebsd/spl/spl_dtrace.c @@ -31,5 +31,4 @@ #include #include -/* CSTYLED */ SDT_PROBE_DEFINE1(sdt, , , set__error, "int"); diff --git a/module/os/freebsd/zfs/dmu_os.c b/module/os/freebsd/zfs/dmu_os.c index 0a0af102ea82..370ce2d806e8 100644 --- a/module/os/freebsd/zfs/dmu_os.c +++ b/module/os/freebsd/zfs/dmu_os.c @@ -103,6 +103,7 @@ dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, db->db_offset + bufoff); thiscpy = MIN(PAGESIZE, tocpy - copied); va = zfs_map_page(*ma, &sf); + ASSERT(db->db_data != NULL); memcpy((char *)db->db_data + bufoff, va, thiscpy); zfs_unmap_page(sf); ma += 1; @@ -172,6 +173,7 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, ASSERT3U(db->db_size, >, PAGE_SIZE); bufoff = IDX_TO_OFF(m->pindex) % db->db_size; va = zfs_map_page(m, &sf); + ASSERT(db->db_data != NULL); memcpy(va, (char *)db->db_data + bufoff, PAGESIZE); zfs_unmap_page(sf); vm_page_valid(m); @@ -211,8 +213,10 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, */ tocpy = MIN(db->db_size - bufoff, PAGESIZE - pgoff); ASSERT3S(tocpy, >=, 0); - if (m != bogus_page) + if (m != bogus_page) { + ASSERT(db->db_data != NULL); memcpy(va + pgoff, (char *)db->db_data + bufoff, tocpy); + } pgoff += tocpy; ASSERT3S(pgoff, >=, 0); @@ -290,6 +294,7 @@ dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, bufoff = IDX_TO_OFF(m->pindex) % db->db_size; tocpy = MIN(db->db_size - bufoff, PAGESIZE); va = zfs_map_page(m, &sf); + ASSERT(db->db_data != NULL); memcpy(va, (char *)db->db_data + bufoff, tocpy); if (tocpy < PAGESIZE) { ASSERT3S(i, ==, *rahead - 1); diff --git a/module/os/freebsd/zfs/sysctl_os.c b/module/os/freebsd/zfs/sysctl_os.c index c84cb7407a9c..7350b8a6d49f 100644 --- a/module/os/freebsd/zfs/sysctl_os.c +++ b/module/os/freebsd/zfs/sysctl_os.c @@ -187,12 +187,10 @@ param_set_arc_max(SYSCTL_HANDLER_ARGS) return (0); } -/* BEGIN CSTYLED */ SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_max, CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, NULL, 0, param_set_arc_max, "LU", "Maximum ARC size in bytes (LEGACY)"); -/* END CSTYLED */ int param_set_arc_min(SYSCTL_HANDLER_ARGS) @@ -218,12 +216,10 @@ param_set_arc_min(SYSCTL_HANDLER_ARGS) return (0); } -/* BEGIN CSTYLED */ SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_min, CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, NULL, 0, param_set_arc_min, "LU", "Minimum ARC size in bytes (LEGACY)"); -/* END CSTYLED */ extern uint_t zfs_arc_free_target; @@ -252,13 +248,11 @@ param_set_arc_free_target(SYSCTL_HANDLER_ARGS) * NOTE: This sysctl is CTLFLAG_RW not CTLFLAG_RWTUN due to its dependency on * pagedaemon initialization. */ -/* BEGIN CSTYLED */ SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_free_target, CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, param_set_arc_free_target, "IU", "Desired number of free pages below which ARC triggers reclaim" " (LEGACY)"); -/* END CSTYLED */ int param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS) @@ -278,84 +272,64 @@ param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS) return (0); } -/* BEGIN CSTYLED */ SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_no_grow_shift, CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, NULL, 0, param_set_arc_no_grow_shift, "I", "log2(fraction of ARC which must be free to allow growing) (LEGACY)"); -/* END CSTYLED */ extern uint64_t l2arc_write_max; -/* BEGIN CSTYLED */ SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_max, CTLFLAG_RWTUN, &l2arc_write_max, 0, "Max write bytes per interval (LEGACY)"); -/* END CSTYLED */ extern uint64_t l2arc_write_boost; -/* BEGIN CSTYLED */ SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_boost, CTLFLAG_RWTUN, &l2arc_write_boost, 0, "Extra write bytes during device warmup (LEGACY)"); -/* END CSTYLED */ extern uint64_t l2arc_headroom; -/* BEGIN CSTYLED */ SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_headroom, CTLFLAG_RWTUN, &l2arc_headroom, 0, "Number of max device writes to precache (LEGACY)"); -/* END CSTYLED */ extern uint64_t l2arc_headroom_boost; -/* BEGIN CSTYLED */ SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_headroom_boost, CTLFLAG_RWTUN, &l2arc_headroom_boost, 0, "Compressed l2arc_headroom multiplier (LEGACY)"); -/* END CSTYLED */ extern uint64_t l2arc_feed_secs; -/* BEGIN CSTYLED */ SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_secs, CTLFLAG_RWTUN, &l2arc_feed_secs, 0, "Seconds between L2ARC writing (LEGACY)"); -/* END CSTYLED */ extern uint64_t l2arc_feed_min_ms; -/* BEGIN CSTYLED */ SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_min_ms, CTLFLAG_RWTUN, &l2arc_feed_min_ms, 0, "Min feed interval in milliseconds (LEGACY)"); -/* END CSTYLED */ extern int l2arc_noprefetch; -/* BEGIN CSTYLED */ SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_noprefetch, CTLFLAG_RWTUN, &l2arc_noprefetch, 0, "Skip caching prefetched buffers (LEGACY)"); -/* END CSTYLED */ extern int l2arc_feed_again; -/* BEGIN CSTYLED */ SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_feed_again, CTLFLAG_RWTUN, &l2arc_feed_again, 0, "Turbo L2ARC warmup (LEGACY)"); -/* END CSTYLED */ extern int l2arc_norw; -/* BEGIN CSTYLED */ SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_norw, CTLFLAG_RWTUN, &l2arc_norw, 0, "No reads during writes (LEGACY)"); -/* END CSTYLED */ static int param_get_arc_state_size(SYSCTL_HANDLER_ARGS) @@ -370,7 +344,6 @@ param_get_arc_state_size(SYSCTL_HANDLER_ARGS) extern arc_state_t ARC_anon; -/* BEGIN CSTYLED */ SYSCTL_PROC(_vfs_zfs, OID_AUTO, anon_size, CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, &ARC_anon, 0, param_get_arc_state_size, "Q", @@ -381,11 +354,9 @@ SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_metadata_esize, CTLFLAG_RD, SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_data_esize, CTLFLAG_RD, &ARC_anon.arcs_esize[ARC_BUFC_DATA].rc_count, 0, "size of evictable data in anonymous state"); -/* END CSTYLED */ extern arc_state_t ARC_mru; -/* BEGIN CSTYLED */ SYSCTL_PROC(_vfs_zfs, OID_AUTO, mru_size, CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, &ARC_mru, 0, param_get_arc_state_size, "Q", @@ -396,11 +367,9 @@ SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_metadata_esize, CTLFLAG_RD, SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_data_esize, CTLFLAG_RD, &ARC_mru.arcs_esize[ARC_BUFC_DATA].rc_count, 0, "size of evictable data in mru state"); -/* END CSTYLED */ extern arc_state_t ARC_mru_ghost; -/* BEGIN CSTYLED */ SYSCTL_PROC(_vfs_zfs, OID_AUTO, mru_ghost_size, CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, &ARC_mru_ghost, 0, param_get_arc_state_size, "Q", @@ -411,11 +380,9 @@ SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_metadata_esize, CTLFLAG_RD, SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_data_esize, CTLFLAG_RD, &ARC_mru_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0, "size of evictable data in mru ghost state"); -/* END CSTYLED */ extern arc_state_t ARC_mfu; -/* BEGIN CSTYLED */ SYSCTL_PROC(_vfs_zfs, OID_AUTO, mfu_size, CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, &ARC_mfu, 0, param_get_arc_state_size, "Q", @@ -426,11 +393,9 @@ SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_metadata_esize, CTLFLAG_RD, SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_data_esize, CTLFLAG_RD, &ARC_mfu.arcs_esize[ARC_BUFC_DATA].rc_count, 0, "size of evictable data in mfu state"); -/* END CSTYLED */ extern arc_state_t ARC_mfu_ghost; -/* BEGIN CSTYLED */ SYSCTL_PROC(_vfs_zfs, OID_AUTO, mfu_ghost_size, CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, &ARC_mfu_ghost, 0, param_get_arc_state_size, "Q", @@ -441,11 +406,9 @@ SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_metadata_esize, CTLFLAG_RD, SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_data_esize, CTLFLAG_RD, &ARC_mfu_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0, "size of evictable data in mfu ghost state"); -/* END CSTYLED */ extern arc_state_t ARC_uncached; -/* BEGIN CSTYLED */ SYSCTL_PROC(_vfs_zfs, OID_AUTO, uncached_size, CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, &ARC_uncached, 0, param_get_arc_state_size, "Q", @@ -456,16 +419,13 @@ SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, uncached_metadata_esize, CTLFLAG_RD, SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, uncached_data_esize, CTLFLAG_RD, &ARC_uncached.arcs_esize[ARC_BUFC_DATA].rc_count, 0, "size of evictable data in uncached state"); -/* END CSTYLED */ extern arc_state_t ARC_l2c_only; -/* BEGIN CSTYLED */ SYSCTL_PROC(_vfs_zfs, OID_AUTO, l2c_only_size, CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, &ARC_l2c_only, 0, param_get_arc_state_size, "Q", "size of l2c_only state"); -/* END CSTYLED */ /* dbuf.c */ @@ -477,19 +437,15 @@ SYSCTL_NODE(_vfs_zfs, OID_AUTO, zfetch, CTLFLAG_RW, 0, "ZFS ZFETCH (LEGACY)"); extern uint32_t zfetch_max_distance; -/* BEGIN CSTYLED */ SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_distance, CTLFLAG_RWTUN, &zfetch_max_distance, 0, "Max bytes to prefetch per stream (LEGACY)"); -/* END CSTYLED */ extern uint32_t zfetch_max_idistance; -/* BEGIN CSTYLED */ SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_idistance, CTLFLAG_RWTUN, &zfetch_max_idistance, 0, "Max bytes to prefetch indirects for per stream (LEGACY)"); -/* END CSTYLED */ /* dsl_pool.c */ @@ -527,12 +483,10 @@ param_set_active_allocator(SYSCTL_HANDLER_ARGS) */ extern int zfs_metaslab_sm_blksz_no_log; -/* BEGIN CSTYLED */ SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, sm_blksz_no_log, CTLFLAG_RDTUN, &zfs_metaslab_sm_blksz_no_log, 0, "Block size for space map in pools with log space map disabled. " "Power of 2 greater than 4096."); -/* END CSTYLED */ /* * When the log space map feature is enabled, we accumulate a lot of @@ -541,12 +495,10 @@ SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, sm_blksz_no_log, */ extern int zfs_metaslab_sm_blksz_with_log; -/* BEGIN CSTYLED */ SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, sm_blksz_with_log, CTLFLAG_RDTUN, &zfs_metaslab_sm_blksz_with_log, 0, "Block size for space map in pools with log space map enabled. " "Power of 2 greater than 4096."); -/* END CSTYLED */ /* * The in-core space map representation is more compact than its on-disk form. @@ -556,29 +508,23 @@ SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, sm_blksz_with_log, */ extern uint_t zfs_condense_pct; -/* BEGIN CSTYLED */ SYSCTL_UINT(_vfs_zfs, OID_AUTO, condense_pct, CTLFLAG_RWTUN, &zfs_condense_pct, 0, "Condense on-disk spacemap when it is more than this many percents" " of in-memory counterpart"); -/* END CSTYLED */ extern uint_t zfs_remove_max_segment; -/* BEGIN CSTYLED */ SYSCTL_UINT(_vfs_zfs, OID_AUTO, remove_max_segment, CTLFLAG_RWTUN, &zfs_remove_max_segment, 0, "Largest contiguous segment ZFS will attempt to allocate when removing" " a device"); -/* END CSTYLED */ extern int zfs_removal_suspend_progress; -/* BEGIN CSTYLED */ SYSCTL_INT(_vfs_zfs, OID_AUTO, removal_suspend_progress, CTLFLAG_RWTUN, &zfs_removal_suspend_progress, 0, "Ensures certain actions can happen while in the middle of a removal"); -/* END CSTYLED */ /* * Minimum size which forces the dynamic allocator to change @@ -588,12 +534,10 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, removal_suspend_progress, */ extern uint64_t metaslab_df_alloc_threshold; -/* BEGIN CSTYLED */ SYSCTL_QUAD(_vfs_zfs_metaslab, OID_AUTO, df_alloc_threshold, CTLFLAG_RWTUN, &metaslab_df_alloc_threshold, 0, "Minimum size which forces the dynamic allocator to change its" " allocation strategy"); -/* END CSTYLED */ /* * The minimum free space, in percent, which must be available @@ -603,12 +547,10 @@ SYSCTL_QUAD(_vfs_zfs_metaslab, OID_AUTO, df_alloc_threshold, */ extern uint_t metaslab_df_free_pct; -/* BEGIN CSTYLED */ SYSCTL_UINT(_vfs_zfs_metaslab, OID_AUTO, df_free_pct, CTLFLAG_RWTUN, &metaslab_df_free_pct, 0, "The minimum free space, in percent, which must be available in a" " space map to continue allocations in a first-fit fashion"); -/* END CSTYLED */ /* mmp.c */ @@ -631,28 +573,22 @@ param_set_multihost_interval(SYSCTL_HANDLER_ARGS) extern int zfs_ccw_retry_interval; -/* BEGIN CSTYLED */ SYSCTL_INT(_vfs_zfs, OID_AUTO, ccw_retry_interval, CTLFLAG_RWTUN, &zfs_ccw_retry_interval, 0, "Configuration cache file write, retry after failure, interval" " (seconds)"); -/* END CSTYLED */ extern uint64_t zfs_max_missing_tvds_cachefile; -/* BEGIN CSTYLED */ SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, max_missing_tvds_cachefile, CTLFLAG_RWTUN, &zfs_max_missing_tvds_cachefile, 0, "Allow importing pools with missing top-level vdevs in cache file"); -/* END CSTYLED */ extern uint64_t zfs_max_missing_tvds_scan; -/* BEGIN CSTYLED */ SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, max_missing_tvds_scan, CTLFLAG_RWTUN, &zfs_max_missing_tvds_scan, 0, "Allow importing pools with missing top-level vdevs during scan"); -/* END CSTYLED */ /* spa_misc.c */ @@ -681,11 +617,9 @@ sysctl_vfs_zfs_debug_flags(SYSCTL_HANDLER_ARGS) return (0); } -/* BEGIN CSTYLED */ SYSCTL_PROC(_vfs_zfs, OID_AUTO, debugflags, CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RWTUN, NULL, 0, sysctl_vfs_zfs_debug_flags, "IU", "Debug flags for ZFS testing."); -/* END CSTYLED */ int param_set_deadman_synctime(SYSCTL_HANDLER_ARGS) @@ -768,10 +702,8 @@ param_set_slop_shift(SYSCTL_HANDLER_ARGS) extern int space_map_ibs; -/* BEGIN CSTYLED */ SYSCTL_INT(_vfs_zfs, OID_AUTO, space_map_ibs, CTLFLAG_RWTUN, &space_map_ibs, 0, "Space map indirect block shift"); -/* END CSTYLED */ /* vdev.c */ @@ -795,13 +727,11 @@ param_set_min_auto_ashift(SYSCTL_HANDLER_ARGS) return (0); } -/* BEGIN CSTYLED */ SYSCTL_PROC(_vfs_zfs, OID_AUTO, min_auto_ashift, CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, &zfs_vdev_min_auto_ashift, sizeof (zfs_vdev_min_auto_ashift), param_set_min_auto_ashift, "IU", "Min ashift used when creating new top-level vdev. (LEGACY)"); -/* END CSTYLED */ int param_set_max_auto_ashift(SYSCTL_HANDLER_ARGS) @@ -822,14 +752,12 @@ param_set_max_auto_ashift(SYSCTL_HANDLER_ARGS) return (0); } -/* BEGIN CSTYLED */ SYSCTL_PROC(_vfs_zfs, OID_AUTO, max_auto_ashift, CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, &zfs_vdev_max_auto_ashift, sizeof (zfs_vdev_max_auto_ashift), param_set_max_auto_ashift, "IU", "Max ashift used when optimizing for logical -> physical sector size on" " new top-level vdevs. (LEGACY)"); -/* END CSTYLED */ /* * Since the DTL space map of a vdev is not expected to have a lot of @@ -837,11 +765,9 @@ SYSCTL_PROC(_vfs_zfs, OID_AUTO, max_auto_ashift, */ extern int zfs_vdev_dtl_sm_blksz; -/* BEGIN CSTYLED */ SYSCTL_INT(_vfs_zfs, OID_AUTO, dtl_sm_blksz, CTLFLAG_RDTUN, &zfs_vdev_dtl_sm_blksz, 0, "Block size for DTL space map. Power of 2 greater than 4096."); -/* END CSTYLED */ /* * vdev-wide space maps that have lots of entries written to them at @@ -850,19 +776,15 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, dtl_sm_blksz, */ extern int zfs_vdev_standard_sm_blksz; -/* BEGIN CSTYLED */ SYSCTL_INT(_vfs_zfs, OID_AUTO, standard_sm_blksz, CTLFLAG_RDTUN, &zfs_vdev_standard_sm_blksz, 0, "Block size for standard space map. Power of 2 greater than 4096."); -/* END CSTYLED */ extern int vdev_validate_skip; -/* BEGIN CSTYLED */ SYSCTL_INT(_vfs_zfs, OID_AUTO, validate_skip, CTLFLAG_RDTUN, &vdev_validate_skip, 0, "Enable to bypass vdev_validate()."); -/* END CSTYLED */ /* vdev_mirror.c */ @@ -870,17 +792,13 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, validate_skip, extern uint_t zfs_vdev_max_active; -/* BEGIN CSTYLED */ SYSCTL_UINT(_vfs_zfs, OID_AUTO, top_maxinflight, CTLFLAG_RWTUN, &zfs_vdev_max_active, 0, "The maximum number of I/Os of all types active for each device." " (LEGACY)"); -/* END CSTYLED */ /* zio.c */ -/* BEGIN CSTYLED */ SYSCTL_INT(_vfs_zfs_zio, OID_AUTO, exclude_metadata, CTLFLAG_RDTUN, &zio_exclude_metadata, 0, "Exclude metadata buffers from dumps as well"); -/* END CSTYLED */ diff --git a/module/os/freebsd/zfs/zfs_vnops_os.c b/module/os/freebsd/zfs/zfs_vnops_os.c index e5acd684ee6b..b8c2c341dace 100644 --- a/module/os/freebsd/zfs/zfs_vnops_os.c +++ b/module/os/freebsd/zfs/zfs_vnops_os.c @@ -291,8 +291,12 @@ zfs_ioctl(vnode_t *vp, ulong_t com, intptr_t data, int flag, cred_t *cred, case F_SEEK_HOLE: { off = *(offset_t *)data; + error = vn_lock(vp, LK_SHARED); + if (error) + return (error); /* offset parameter is in/out */ error = zfs_holey(VTOZ(vp), com, &off); + VOP_UNLOCK(vp); if (error) return (error); *(offset_t *)data = off; @@ -3956,10 +3960,8 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind, * to the page fault handler's OOM logic, but this is * the best we can do for now. */ - for (int i = 0; i < count; i++) { - ASSERT(vm_page_none_valid(ma[i])); + for (int i = 0; i < count; i++) vm_page_xunbusy(ma[i]); - } lr = zfs_rangelock_enter(&zp->z_rangelock, rounddown(start, blksz), len, RL_READER); @@ -6198,7 +6200,7 @@ zfs_freebsd_copy_file_range(struct vop_copy_file_range_args *ap) } else { #if (__FreeBSD_version >= 1302506 && __FreeBSD_version < 1400000) || \ __FreeBSD_version >= 1400086 - vn_lock_pair(invp, false, LK_EXCLUSIVE, outvp, false, + vn_lock_pair(invp, false, LK_SHARED, outvp, false, LK_EXCLUSIVE); #else vn_lock_pair(invp, false, outvp, false); diff --git a/module/os/freebsd/zfs/zfs_znode_os.c b/module/os/freebsd/zfs/zfs_znode_os.c index a31ecc367414..31ca07a86dda 100644 --- a/module/os/freebsd/zfs/zfs_znode_os.c +++ b/module/os/freebsd/zfs/zfs_znode_os.c @@ -370,8 +370,6 @@ zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp, */ if (zp->z_id == zfsvfs->z_root && zfsvfs->z_parent == zfsvfs) ZTOV(zp)->v_flag |= VROOT; - - vn_exists(ZTOV(zp)); } void diff --git a/module/os/freebsd/zfs/zio_crypt.c b/module/os/freebsd/zfs/zio_crypt.c index feaca93fb933..195ac58f6f1a 100644 --- a/module/os/freebsd/zfs/zio_crypt.c +++ b/module/os/freebsd/zfs/zio_crypt.c @@ -1823,7 +1823,6 @@ zio_do_crypt_abd(boolean_t encrypt, zio_crypt_key_t *key, dmu_object_type_t ot, } #if defined(_KERNEL) && defined(HAVE_SPL) -/* CSTYLED */ module_param(zfs_key_max_salt_uses, ulong, 0644); MODULE_PARM_DESC(zfs_key_max_salt_uses, "Max number of times a salt value " "can be used for generating encryption keys before it is rotated"); diff --git a/module/os/linux/spl/spl-err.c b/module/os/linux/spl/spl-err.c index 29781b9515b2..81e520547dd7 100644 --- a/module/os/linux/spl/spl-err.c +++ b/module/os/linux/spl/spl-err.c @@ -33,7 +33,6 @@ * But we would still default to the current default of not to do that. */ static unsigned int spl_panic_halt; -/* CSTYLED */ module_param(spl_panic_halt, uint, 0644); MODULE_PARM_DESC(spl_panic_halt, "Cause kernel panic on assertion failures"); diff --git a/module/os/linux/spl/spl-generic.c b/module/os/linux/spl/spl-generic.c index 6a95d77ac278..e13914221a6a 100644 --- a/module/os/linux/spl/spl-generic.c +++ b/module/os/linux/spl/spl-generic.c @@ -54,7 +54,6 @@ unsigned long spl_hostid = 0; EXPORT_SYMBOL(spl_hostid); -/* CSTYLED */ module_param(spl_hostid, ulong, 0644); MODULE_PARM_DESC(spl_hostid, "The system hostid."); diff --git a/module/os/linux/spl/spl-kmem-cache.c b/module/os/linux/spl/spl-kmem-cache.c index 7e806bd5699c..33c7d0879741 100644 --- a/module/os/linux/spl/spl-kmem-cache.c +++ b/module/os/linux/spl/spl-kmem-cache.c @@ -48,7 +48,6 @@ #define smp_mb__after_atomic(x) smp_mb__after_clear_bit(x) #endif -/* BEGIN CSTYLED */ /* * Cache magazines are an optimization designed to minimize the cost of * allocating memory. They do this by keeping a per-cpu cache of recently @@ -97,7 +96,6 @@ static unsigned int spl_kmem_cache_kmem_threads = 4; module_param(spl_kmem_cache_kmem_threads, uint, 0444); MODULE_PARM_DESC(spl_kmem_cache_kmem_threads, "Number of spl_kmem_cache threads"); -/* END CSTYLED */ /* * Slab allocation interfaces diff --git a/module/os/linux/spl/spl-kmem.c b/module/os/linux/spl/spl-kmem.c index cae304d33bc3..3e8361184d57 100644 --- a/module/os/linux/spl/spl-kmem.c +++ b/module/os/linux/spl/spl-kmem.c @@ -26,7 +26,6 @@ #include #include -/* BEGIN CSTYLED */ /* * As a general rule kmem_alloc() allocations should be small, preferably * just a few pages since they must by physically contiguous. Therefore, a @@ -62,7 +61,6 @@ module_param(spl_kmem_alloc_max, uint, 0644); MODULE_PARM_DESC(spl_kmem_alloc_max, "Maximum size in bytes for a kmem_alloc()"); EXPORT_SYMBOL(spl_kmem_alloc_max); -/* END CSTYLED */ int kmem_debugging(void) diff --git a/module/os/linux/spl/spl-taskq.c b/module/os/linux/spl/spl-taskq.c index 7f4cab5da114..77dd472ea8b1 100644 --- a/module/os/linux/spl/spl-taskq.c +++ b/module/os/linux/spl/spl-taskq.c @@ -117,9 +117,7 @@ module_param(spl_taskq_thread_bind, int, 0644); MODULE_PARM_DESC(spl_taskq_thread_bind, "Bind taskq thread to CPU by default"); static uint_t spl_taskq_thread_timeout_ms = 5000; -/* BEGIN CSTYLED */ module_param(spl_taskq_thread_timeout_ms, uint, 0644); -/* END CSTYLED */ MODULE_PARM_DESC(spl_taskq_thread_timeout_ms, "Minimum idle threads exit interval for dynamic taskqs"); @@ -133,9 +131,7 @@ MODULE_PARM_DESC(spl_taskq_thread_priority, "Allow non-default priority for taskq threads"); static uint_t spl_taskq_thread_sequential = 4; -/* BEGIN CSTYLED */ module_param(spl_taskq_thread_sequential, uint, 0644); -/* END CSTYLED */ MODULE_PARM_DESC(spl_taskq_thread_sequential, "Create new taskq threads after N sequential tasks"); diff --git a/module/os/linux/zfs/abd_os.c b/module/os/linux/zfs/abd_os.c index 04ab8bbca352..39ea3e62dba0 100644 --- a/module/os/linux/zfs/abd_os.c +++ b/module/os/linux/zfs/abd_os.c @@ -1346,7 +1346,6 @@ MODULE_PARM_DESC(zfs_abd_scatter_enabled, module_param(zfs_abd_scatter_min_size, int, 0644); MODULE_PARM_DESC(zfs_abd_scatter_min_size, "Minimum size of scatter allocations."); -/* CSTYLED */ module_param(zfs_abd_scatter_max_order, uint, 0644); MODULE_PARM_DESC(zfs_abd_scatter_max_order, "Maximum order allocation used for a scatter ABD."); diff --git a/module/os/linux/zfs/zfs_debug.c b/module/os/linux/zfs/zfs_debug.c index a017900d5538..7d01f8f373b2 100644 --- a/module/os/linux/zfs/zfs_debug.c +++ b/module/os/linux/zfs/zfs_debug.c @@ -214,7 +214,5 @@ __dprintf(boolean_t dprint, const char *file, const char *func, module_param(zfs_dbgmsg_enable, int, 0644); MODULE_PARM_DESC(zfs_dbgmsg_enable, "Enable ZFS debug message log"); -/* BEGIN CSTYLED */ module_param(zfs_dbgmsg_maxsize, uint, 0644); -/* END CSTYLED */ MODULE_PARM_DESC(zfs_dbgmsg_maxsize, "Maximum ZFS debug log size"); diff --git a/module/os/linux/zfs/zfs_uio.c b/module/os/linux/zfs/zfs_uio.c index 0146d842339a..f08415fdb2e3 100644 --- a/module/os/linux/zfs/zfs_uio.c +++ b/module/os/linux/zfs/zfs_uio.c @@ -546,8 +546,9 @@ zfs_uio_dio_check_for_zero_page(zfs_uio_t *uio) unlock_page(p); put_page(p); - p = __page_cache_alloc(gfp_zero_page); - zfs_mark_page(p); + uio->uio_dio.pages[i] = + __page_cache_alloc(gfp_zero_page); + zfs_mark_page(uio->uio_dio.pages[i]); } else { unlock_page(p); } diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c index dd9fd760b9c2..a882c88a7a72 100644 --- a/module/os/linux/zfs/zfs_vnops_os.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -4345,7 +4345,6 @@ EXPORT_SYMBOL(zfs_putpage); EXPORT_SYMBOL(zfs_dirty_inode); EXPORT_SYMBOL(zfs_map); -/* CSTYLED */ module_param(zfs_delete_blocks, ulong, 0644); MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async"); #endif diff --git a/module/os/linux/zfs/zfs_znode_os.c b/module/os/linux/zfs/zfs_znode_os.c index bbaca2f58394..aff7b1f4dac1 100644 --- a/module/os/linux/zfs/zfs_znode_os.c +++ b/module/os/linux/zfs/zfs_znode_os.c @@ -1967,7 +1967,6 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) EXPORT_SYMBOL(zfs_create_fs); EXPORT_SYMBOL(zfs_obj_to_path); -/* CSTYLED */ module_param(zfs_object_mutex_size, uint, 0644); MODULE_PARM_DESC(zfs_object_mutex_size, "Size of znode hold array"); module_param(zfs_unlink_suspend_progress, int, 0644); diff --git a/module/os/linux/zfs/zio_crypt.c b/module/os/linux/zfs/zio_crypt.c index 21f3740f6fe6..22eeef7f0743 100644 --- a/module/os/linux/zfs/zio_crypt.c +++ b/module/os/linux/zfs/zio_crypt.c @@ -2073,7 +2073,6 @@ zio_do_crypt_abd(boolean_t encrypt, zio_crypt_key_t *key, dmu_object_type_t ot, } #if defined(_KERNEL) -/* CSTYLED */ module_param(zfs_key_max_salt_uses, ulong, 0644); MODULE_PARM_DESC(zfs_key_max_salt_uses, "Max number of times a salt value " "can be used for generating encryption keys before it is rotated"); diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c index f6e014327717..ff1370c543dc 100644 --- a/module/os/linux/zfs/zpl_file.c +++ b/module/os/linux/zfs/zpl_file.c @@ -1143,7 +1143,6 @@ const struct file_operations zpl_dir_file_operations = { #endif }; -/* CSTYLED */ module_param(zfs_fallocate_reserve_percent, uint, 0644); MODULE_PARM_DESC(zfs_fallocate_reserve_percent, "Percentage of length to use for the available capacity check"); diff --git a/module/os/linux/zfs/zpl_super.c b/module/os/linux/zfs/zpl_super.c index 287f5f36f9dd..b97b701b7460 100644 --- a/module/os/linux/zfs/zpl_super.c +++ b/module/os/linux/zfs/zpl_super.c @@ -375,7 +375,18 @@ zpl_prune_sb(uint64_t nr_to_scan, void *arg) struct super_block *sb = (struct super_block *)arg; int objects = 0; - (void) -zfs_prune(sb, nr_to_scan, &objects); + /* + * deactivate_locked_super calls shrinker_free and only then + * sops->kill_sb cb, resulting in UAF on umount when trying to reach + * for the shrinker functions in zpl_prune_sb of in-umount dataset. + * Increment if s_active is not zero, but don't prune if it is - + * umount could be underway. + */ + if (atomic_inc_not_zero(&sb->s_active)) { + (void) -zfs_prune(sb, nr_to_scan, &objects); + atomic_dec(&sb->s_active); + } + } const struct super_operations zpl_super_operations = { diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c index 47aa6417068d..7c9aae6a66af 100644 --- a/module/os/linux/zfs/zvol_os.c +++ b/module/os/linux/zfs/zvol_os.c @@ -1899,7 +1899,6 @@ zvol_fini(void) ida_destroy(&zvol_ida); } -/* BEGIN CSTYLED */ module_param(zvol_inhibit_dev, uint, 0644); MODULE_PARM_DESC(zvol_inhibit_dev, "Do not create zvol device nodes"); @@ -1908,7 +1907,7 @@ MODULE_PARM_DESC(zvol_major, "Major number for zvol device"); module_param(zvol_threads, uint, 0444); MODULE_PARM_DESC(zvol_threads, "Number of threads to handle I/O requests. Set" - "to 0 to use all active CPUs"); + "to 0 to use all active CPUs"); module_param(zvol_request_sync, uint, 0644); MODULE_PARM_DESC(zvol_request_sync, "Synchronously handle bio requests"); @@ -1933,11 +1932,9 @@ MODULE_PARM_DESC(zvol_use_blk_mq, "Use the blk-mq API for zvols"); module_param(zvol_blk_mq_blocks_per_thread, uint, 0644); MODULE_PARM_DESC(zvol_blk_mq_blocks_per_thread, - "Process volblocksize blocks per thread"); + "Process volblocksize blocks per thread"); #ifndef HAVE_BLKDEV_GET_ERESTARTSYS module_param(zvol_open_timeout_ms, uint, 0644); MODULE_PARM_DESC(zvol_open_timeout_ms, "Timeout for ZVOL open retries"); #endif - -/* END CSTYLED */ diff --git a/module/zcommon/zfs_valstr.c b/module/zcommon/zfs_valstr.c index 43bccea14a85..fde8ae28ef36 100644 --- a/module/zcommon/zfs_valstr.c +++ b/module/zcommon/zfs_valstr.c @@ -185,7 +185,6 @@ zfs_valstr_ ## name(int v, char *out, size_t outlen) \ /* String tables */ /* ZIO flags: zio_flag_t, typically zio->io_flags */ -/* BEGIN CSTYLED */ _VALSTR_BITFIELD_IMPL(zio_flag, { '.', "DA", "DONT_AGGREGATE" }, { '.', "RP", "IO_REPAIR" }, @@ -221,13 +220,11 @@ _VALSTR_BITFIELD_IMPL(zio_flag, { '.', "DG", "DELEGATED" }, { '.', "DC", "DIO_CHKSUM_ERR" }, ) -/* END CSTYLED */ /* * ZIO pipeline stage(s): enum zio_stage, typically zio->io_stage or * zio->io_pipeline. */ -/* BEGIN CSTYLED */ _VALSTR_BITFIELD_IMPL(zio_stage, { 'O', "O ", "OPEN" }, { 'I', "RI", "READ_BP_INIT" }, @@ -257,10 +254,8 @@ _VALSTR_BITFIELD_IMPL(zio_stage, { 'C', "DC", "DIO_CHECKSUM_VERIFY" }, { 'X', "X ", "DONE" }, ) -/* END CSTYLED */ /* ZIO priority: zio_priority_t, typically zio->io_priority */ -/* BEGIN CSTYLED */ _VALSTR_ENUM_IMPL(zio_priority, "SYNC_READ", "SYNC_WRITE", @@ -274,7 +269,6 @@ _VALSTR_ENUM_IMPL(zio_priority, "[NUM_QUEUEABLE]", "NOW", ) -/* END CSTYLED */ #undef _VALSTR_BITFIELD_IMPL #undef _VALSTR_ENUM_IMPL diff --git a/module/zcommon/zpool_prop.c b/module/zcommon/zpool_prop.c index d3355730ba3d..a709679b9032 100644 --- a/module/zcommon/zpool_prop.c +++ b/module/zcommon/zpool_prop.c @@ -128,6 +128,9 @@ zpool_prop_init(void) zprop_register_number(ZPOOL_PROP_DEDUP_TABLE_SIZE, "dedup_table_size", 0, PROP_READONLY, ZFS_TYPE_POOL, "", "DDTSIZE", B_FALSE, sfeatures); + zprop_register_number(ZPOOL_PROP_LAST_SCRUBBED_TXG, + "last_scrubbed_txg", 0, PROP_READONLY, ZFS_TYPE_POOL, "", + "LAST_SCRUBBED_TXG", B_FALSE, sfeatures); /* default number properties */ zprop_register_number(ZPOOL_PROP_VERSION, "version", SPA_VERSION, diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 0f48fa992cde..d1102a9d4359 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -26,7 +26,7 @@ * Copyright (c) 2017, Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2019, loli10K . All rights reserved. * Copyright (c) 2020, George Amanakis. All rights reserved. - * Copyright (c) 2019, 2023, Klara Inc. + * Copyright (c) 2019, 2024, Klara Inc. * Copyright (c) 2019, Allan Jude * Copyright (c) 2020, The FreeBSD Foundation [1] * Copyright (c) 2021, 2024 by George Melikov. All rights reserved. @@ -465,6 +465,9 @@ static uint_t zfs_arc_lotsfree_percent = 10; */ static int zfs_arc_prune_task_threads = 1; +/* Used by spa_export/spa_destroy to flush the arc asynchronously */ +static taskq_t *arc_flush_taskq; + /* * Controls the number of ARC eviction threads. * Possible values: @@ -791,6 +794,23 @@ static buf_hash_table_t buf_hash_table; uint64_t zfs_crc64_table[256]; +/* + * Asynchronous ARC flush + * + * We track these in a list for arc_async_flush_guid_inuse(). + * Used for both L1 and L2 async teardown. + */ +static list_t arc_async_flush_list; +static kmutex_t arc_async_flush_lock; + +typedef struct arc_async_flush { + uint64_t af_spa_guid; + taskq_ent_t af_tqent; + uint_t af_cache_level; /* 1 or 2 to differentiate node */ + list_node_t af_node; +} arc_async_flush_t; + + /* * Level 2 ARC */ @@ -1726,13 +1746,15 @@ arc_buf_try_copy_decompressed_data(arc_buf_t *buf) */ static arc_buf_hdr_t * arc_buf_alloc_l2only(size_t size, arc_buf_contents_t type, l2arc_dev_t *dev, - dva_t dva, uint64_t daddr, int32_t psize, uint64_t birth, + dva_t dva, uint64_t daddr, int32_t psize, uint64_t asize, uint64_t birth, enum zio_compress compress, uint8_t complevel, boolean_t protected, boolean_t prefetch, arc_state_type_t arcs_state) { arc_buf_hdr_t *hdr; ASSERT(size != 0); + ASSERT(dev->l2ad_vdev != NULL); + hdr = kmem_cache_alloc(hdr_l2only_cache, KM_SLEEP); hdr->b_birth = birth; hdr->b_type = type; @@ -1740,6 +1762,7 @@ arc_buf_alloc_l2only(size_t size, arc_buf_contents_t type, l2arc_dev_t *dev, arc_hdr_set_flags(hdr, arc_bufc_to_flags(type) | ARC_FLAG_HAS_L2HDR); HDR_SET_LSIZE(hdr, size); HDR_SET_PSIZE(hdr, psize); + HDR_SET_L2SIZE(hdr, asize); arc_hdr_set_compress(hdr, compress); hdr->b_complevel = complevel; if (protected) @@ -3527,16 +3550,17 @@ static void l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr, boolean_t state_only) { - l2arc_buf_hdr_t *l2hdr = &hdr->b_l2hdr; - l2arc_dev_t *dev = l2hdr->b_dev; uint64_t lsize = HDR_GET_LSIZE(hdr); uint64_t psize = HDR_GET_PSIZE(hdr); - uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev, psize); + uint64_t asize = HDR_GET_L2SIZE(hdr); arc_buf_contents_t type = hdr->b_type; int64_t lsize_s; int64_t psize_s; int64_t asize_s; + /* For L2 we expect the header's b_l2size to be valid */ + ASSERT3U(asize, >=, psize); + if (incr) { lsize_s = lsize; psize_s = psize; @@ -3598,8 +3622,6 @@ arc_hdr_l2hdr_destroy(arc_buf_hdr_t *hdr) { l2arc_buf_hdr_t *l2hdr = &hdr->b_l2hdr; l2arc_dev_t *dev = l2hdr->b_dev; - uint64_t psize = HDR_GET_PSIZE(hdr); - uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev, psize); ASSERT(MUTEX_HELD(&dev->l2ad_mtx)); ASSERT(HDR_HAS_L2HDR(hdr)); @@ -3607,7 +3629,10 @@ arc_hdr_l2hdr_destroy(arc_buf_hdr_t *hdr) list_remove(&dev->l2ad_buflist, hdr); l2arc_hdr_arcstats_decrement(hdr); - vdev_space_update(dev->l2ad_vdev, -asize, 0, 0); + if (dev->l2ad_vdev != NULL) { + uint64_t asize = HDR_GET_L2SIZE(hdr); + vdev_space_update(dev->l2ad_vdev, -asize, 0, 0); + } (void) zfs_refcount_remove_many(&dev->l2ad_alloc, arc_hdr_size(hdr), hdr); @@ -4482,20 +4507,10 @@ arc_evict(void) return (total_evicted); } -void -arc_flush(spa_t *spa, boolean_t retry) +static void +arc_flush_impl(uint64_t guid, boolean_t retry) { - uint64_t guid = 0; - - /* - * If retry is B_TRUE, a spa must not be specified since we have - * no good way to determine if all of a spa's buffers have been - * evicted from an arc state. - */ - ASSERT(!retry || spa == NULL); - - if (spa != NULL) - guid = spa_load_guid(spa); + ASSERT(!retry || guid == 0); (void) arc_flush_state(arc_mru, guid, ARC_BUFC_DATA, retry); (void) arc_flush_state(arc_mru, guid, ARC_BUFC_METADATA, retry); @@ -4513,6 +4528,106 @@ arc_flush(spa_t *spa, boolean_t retry) (void) arc_flush_state(arc_uncached, guid, ARC_BUFC_METADATA, retry); } +void +arc_flush(spa_t *spa, boolean_t retry) +{ + /* + * If retry is B_TRUE, a spa must not be specified since we have + * no good way to determine if all of a spa's buffers have been + * evicted from an arc state. + */ + ASSERT(!retry || spa == NULL); + + arc_flush_impl(spa != NULL ? spa_load_guid(spa) : 0, retry); +} + +static arc_async_flush_t * +arc_async_flush_add(uint64_t spa_guid, uint_t level) +{ + arc_async_flush_t *af = kmem_alloc(sizeof (*af), KM_SLEEP); + af->af_spa_guid = spa_guid; + af->af_cache_level = level; + taskq_init_ent(&af->af_tqent); + list_link_init(&af->af_node); + + mutex_enter(&arc_async_flush_lock); + list_insert_tail(&arc_async_flush_list, af); + mutex_exit(&arc_async_flush_lock); + + return (af); +} + +static void +arc_async_flush_remove(uint64_t spa_guid, uint_t level) +{ + mutex_enter(&arc_async_flush_lock); + for (arc_async_flush_t *af = list_head(&arc_async_flush_list); + af != NULL; af = list_next(&arc_async_flush_list, af)) { + if (af->af_spa_guid == spa_guid && + af->af_cache_level == level) { + list_remove(&arc_async_flush_list, af); + kmem_free(af, sizeof (*af)); + break; + } + } + mutex_exit(&arc_async_flush_lock); +} + +static void +arc_flush_task(void *arg) +{ + arc_async_flush_t *af = arg; + hrtime_t start_time = gethrtime(); + uint64_t spa_guid = af->af_spa_guid; + + arc_flush_impl(spa_guid, B_FALSE); + arc_async_flush_remove(spa_guid, af->af_cache_level); + + uint64_t elaspsed = NSEC2MSEC(gethrtime() - start_time); + if (elaspsed > 0) { + zfs_dbgmsg("spa %llu arc flushed in %llu ms", + (u_longlong_t)spa_guid, (u_longlong_t)elaspsed); + } +} + +/* + * ARC buffers use the spa's load guid and can continue to exist after + * the spa_t is gone (exported). The blocks are orphaned since each + * spa import has a different load guid. + * + * It's OK if the spa is re-imported while this asynchronous flush is + * still in progress. The new spa_load_guid will be different. + * + * Also, arc_fini will wait for any arc_flush_task to finish. + */ +void +arc_flush_async(spa_t *spa) +{ + uint64_t spa_guid = spa_load_guid(spa); + arc_async_flush_t *af = arc_async_flush_add(spa_guid, 1); + + taskq_dispatch_ent(arc_flush_taskq, arc_flush_task, + af, TQ_SLEEP, &af->af_tqent); +} + +/* + * Check if a guid is still in-use as part of an async teardown task + */ +boolean_t +arc_async_flush_guid_inuse(uint64_t spa_guid) +{ + mutex_enter(&arc_async_flush_lock); + for (arc_async_flush_t *af = list_head(&arc_async_flush_list); + af != NULL; af = list_next(&arc_async_flush_list, af)) { + if (af->af_spa_guid == spa_guid) { + mutex_exit(&arc_async_flush_lock); + return (B_TRUE); + } + } + mutex_exit(&arc_async_flush_lock); + return (B_FALSE); +} + uint64_t arc_reduce_target_size(uint64_t to_free) { @@ -7873,6 +7988,12 @@ arc_init(void) } } + list_create(&arc_async_flush_list, sizeof (arc_async_flush_t), + offsetof(arc_async_flush_t, af_node)); + mutex_init(&arc_async_flush_lock, NULL, MUTEX_DEFAULT, NULL); + arc_flush_taskq = taskq_create("arc_flush", MIN(boot_ncpus, 4), + defclsyspri, 1, INT_MAX, TASKQ_DYNAMIC); + arc_ksp = kstat_create("zfs", 0, "arcstats", "misc", KSTAT_TYPE_NAMED, sizeof (arc_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); @@ -7938,6 +8059,10 @@ arc_fini(void) arc_lowmem_fini(); #endif /* _KERNEL */ + /* Wait for any background flushes */ + taskq_wait(arc_flush_taskq); + taskq_destroy(arc_flush_taskq); + /* Use B_TRUE to ensure *all* buffers are evicted */ arc_flush(NULL, B_TRUE); @@ -7954,6 +8079,9 @@ arc_fini(void) taskq_wait(arc_prune_taskq); taskq_destroy(arc_prune_taskq); + list_destroy(&arc_async_flush_list); + mutex_destroy(&arc_async_flush_lock); + mutex_enter(&arc_prune_mtx); while ((p = list_remove_head(&arc_prune_list)) != NULL) { (void) zfs_refcount_remove(&p->p_refcnt, &arc_prune_list); @@ -8325,6 +8453,18 @@ l2arc_write_interval(clock_t began, uint64_t wanted, uint64_t wrote) return (next); } +static boolean_t +l2arc_dev_invalid(const l2arc_dev_t *dev) +{ + /* + * We want to skip devices that are being rebuilt, trimmed, + * removed, or belong to a spa that is being exported. + */ + return (dev->l2ad_vdev == NULL || vdev_is_dead(dev->l2ad_vdev) || + dev->l2ad_rebuild || dev->l2ad_trim_all || + dev->l2ad_spa == NULL || dev->l2ad_spa->spa_is_exporting); +} + /* * Cycle through L2ARC devices. This is how L2ARC load balances. * If a device is returned, this also returns holding the spa config lock. @@ -8365,12 +8505,10 @@ l2arc_dev_get_next(void) break; ASSERT3P(next, !=, NULL); - } while (vdev_is_dead(next->l2ad_vdev) || next->l2ad_rebuild || - next->l2ad_trim_all || next->l2ad_spa->spa_is_exporting); + } while (l2arc_dev_invalid(next)); /* if we were unable to find any usable vdevs, return NULL */ - if (vdev_is_dead(next->l2ad_vdev) || next->l2ad_rebuild || - next->l2ad_trim_all || next->l2ad_spa->spa_is_exporting) + if (l2arc_dev_invalid(next)) next = NULL; l2arc_dev_last = next; @@ -8500,6 +8638,8 @@ l2arc_write_done(zio_t *zio) uint64_t psize = HDR_GET_PSIZE(hdr); l2arc_hdr_arcstats_decrement(hdr); + ASSERT(dev->l2ad_vdev != NULL); + bytes_dropped += vdev_psize_to_asize(dev->l2ad_vdev, psize); (void) zfs_refcount_remove_many(&dev->l2ad_alloc, @@ -8881,6 +9021,8 @@ l2arc_log_blk_overhead(uint64_t write_sz, l2arc_dev_t *dev) if (dev->l2ad_log_entries == 0) { return (0); } else { + ASSERT(dev->l2ad_vdev != NULL); + uint64_t log_entries = write_sz >> SPA_MINBLOCKSHIFT; uint64_t log_blocks = (log_entries + @@ -8909,6 +9051,9 @@ l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all) vdev_t *vd = dev->l2ad_vdev; boolean_t rerun; + ASSERT(vd != NULL || all); + ASSERT(dev->l2ad_spa != NULL || all); + buflist = &dev->l2ad_buflist; top: @@ -9001,7 +9146,8 @@ l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all) if (!all && l2arc_log_blkptr_valid(dev, lb_ptr_buf->lb_ptr)) { break; } else { - vdev_space_update(vd, -asize, 0, 0); + if (vd != NULL) + vdev_space_update(vd, -asize, 0, 0); ARCSTAT_INCR(arcstat_l2_log_blk_asize, -asize); ARCSTAT_BUMPDOWN(arcstat_l2_log_blk_count); zfs_refcount_remove_many(&dev->l2ad_lb_asize, asize, @@ -9415,6 +9561,8 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) hdr->b_l2hdr.b_hits = 0; hdr->b_l2hdr.b_arcs_state = hdr->b_l1hdr.b_state->arcs_state; + /* l2arc_hdr_arcstats_update() expects a valid asize */ + HDR_SET_L2SIZE(hdr, asize); arc_hdr_set_flags(hdr, ARC_FLAG_HAS_L2HDR | ARC_FLAG_L2_WRITING); @@ -9667,6 +9815,12 @@ l2arc_rebuild_dev(l2arc_dev_t *dev, boolean_t reopen) uint64_t l2dhdr_asize = dev->l2ad_dev_hdr_asize; spa_t *spa = dev->l2ad_spa; + /* + * After a l2arc_remove_vdev(), the spa_t will no longer be valid + */ + if (spa == NULL) + return; + /* * The L2ARC has to hold at least the payload of one log block for * them to be restored (persistent L2ARC). The payload of a log block @@ -9834,53 +9988,110 @@ l2arc_rebuild_vdev(vdev_t *vd, boolean_t reopen) l2arc_rebuild_dev(dev, reopen); } +typedef struct { + l2arc_dev_t *rva_l2arc_dev; + uint64_t rva_spa_gid; + uint64_t rva_vdev_gid; + boolean_t rva_async; + +} remove_vdev_args_t; + +static void +l2arc_device_teardown(void *arg) +{ + remove_vdev_args_t *rva = arg; + l2arc_dev_t *remdev = rva->rva_l2arc_dev; + hrtime_t start_time = gethrtime(); + + /* + * Clear all buflists and ARC references. L2ARC device flush. + */ + l2arc_evict(remdev, 0, B_TRUE); + list_destroy(&remdev->l2ad_buflist); + ASSERT(list_is_empty(&remdev->l2ad_lbptr_list)); + list_destroy(&remdev->l2ad_lbptr_list); + mutex_destroy(&remdev->l2ad_mtx); + zfs_refcount_destroy(&remdev->l2ad_alloc); + zfs_refcount_destroy(&remdev->l2ad_lb_asize); + zfs_refcount_destroy(&remdev->l2ad_lb_count); + kmem_free(remdev->l2ad_dev_hdr, remdev->l2ad_dev_hdr_asize); + vmem_free(remdev, sizeof (l2arc_dev_t)); + + uint64_t elaspsed = NSEC2MSEC(gethrtime() - start_time); + if (elaspsed > 0) { + zfs_dbgmsg("spa %llu, vdev %llu removed in %llu ms", + (u_longlong_t)rva->rva_spa_gid, + (u_longlong_t)rva->rva_vdev_gid, + (u_longlong_t)elaspsed); + } + + if (rva->rva_async) + arc_async_flush_remove(rva->rva_spa_gid, 2); + kmem_free(rva, sizeof (remove_vdev_args_t)); +} + /* * Remove a vdev from the L2ARC. */ void l2arc_remove_vdev(vdev_t *vd) { - l2arc_dev_t *remdev = NULL; + spa_t *spa = vd->vdev_spa; + boolean_t asynchronous = spa->spa_state == POOL_STATE_EXPORTED || + spa->spa_state == POOL_STATE_DESTROYED; /* * Find the device by vdev */ - remdev = l2arc_vdev_get(vd); + l2arc_dev_t *remdev = l2arc_vdev_get(vd); ASSERT3P(remdev, !=, NULL); + /* + * Save info for final teardown + */ + remove_vdev_args_t *rva = kmem_alloc(sizeof (remove_vdev_args_t), + KM_SLEEP); + rva->rva_l2arc_dev = remdev; + rva->rva_spa_gid = spa_load_guid(spa); + rva->rva_vdev_gid = remdev->l2ad_vdev->vdev_guid; + /* * Cancel any ongoing or scheduled rebuild. */ mutex_enter(&l2arc_rebuild_thr_lock); + remdev->l2ad_rebuild_cancel = B_TRUE; if (remdev->l2ad_rebuild_began == B_TRUE) { - remdev->l2ad_rebuild_cancel = B_TRUE; while (remdev->l2ad_rebuild == B_TRUE) cv_wait(&l2arc_rebuild_thr_cv, &l2arc_rebuild_thr_lock); } mutex_exit(&l2arc_rebuild_thr_lock); + rva->rva_async = asynchronous; /* * Remove device from global list */ + ASSERT(spa_config_held(spa, SCL_L2ARC, RW_WRITER) & SCL_L2ARC); mutex_enter(&l2arc_dev_mtx); list_remove(l2arc_dev_list, remdev); l2arc_dev_last = NULL; /* may have been invalidated */ atomic_dec_64(&l2arc_ndev); + + /* During a pool export spa & vdev will no longer be valid */ + if (asynchronous) { + remdev->l2ad_spa = NULL; + remdev->l2ad_vdev = NULL; + } mutex_exit(&l2arc_dev_mtx); - /* - * Clear all buflists and ARC references. L2ARC device flush. - */ - l2arc_evict(remdev, 0, B_TRUE); - list_destroy(&remdev->l2ad_buflist); - ASSERT(list_is_empty(&remdev->l2ad_lbptr_list)); - list_destroy(&remdev->l2ad_lbptr_list); - mutex_destroy(&remdev->l2ad_mtx); - zfs_refcount_destroy(&remdev->l2ad_alloc); - zfs_refcount_destroy(&remdev->l2ad_lb_asize); - zfs_refcount_destroy(&remdev->l2ad_lb_count); - kmem_free(remdev->l2ad_dev_hdr, remdev->l2ad_dev_hdr_asize); - vmem_free(remdev, sizeof (l2arc_dev_t)); + if (!asynchronous) { + l2arc_device_teardown(rva); + return; + } + + arc_async_flush_t *af = arc_async_flush_add(rva->rva_spa_gid, 2); + + taskq_dispatch_ent(arc_flush_taskq, l2arc_device_teardown, rva, + TQ_SLEEP, &af->af_tqent); } void @@ -10206,7 +10417,15 @@ l2arc_rebuild(l2arc_dev_t *dev) vmem_free(this_lb, sizeof (*this_lb)); vmem_free(next_lb, sizeof (*next_lb)); - if (!l2arc_rebuild_enabled) { + if (err == ECANCELED) { + /* + * In case the rebuild was canceled do not log to spa history + * log as the pool may be in the process of being removed. + */ + zfs_dbgmsg("L2ARC rebuild aborted, restored %llu blocks", + (u_longlong_t)zfs_refcount_count(&dev->l2ad_lb_count)); + return (err); + } else if (!l2arc_rebuild_enabled) { spa_history_log_internal(spa, "L2ARC rebuild", NULL, "disabled"); } else if (err == 0 && zfs_refcount_count(&dev->l2ad_lb_count) > 0) { @@ -10224,13 +10443,6 @@ l2arc_rebuild(l2arc_dev_t *dev) "no valid log blocks"); memset(l2dhdr, 0, dev->l2ad_dev_hdr_asize); l2arc_dev_hdr_update(dev); - } else if (err == ECANCELED) { - /* - * In case the rebuild was canceled do not log to spa history - * log as the pool may be in the process of being removed. - */ - zfs_dbgmsg("L2ARC rebuild aborted, restored %llu blocks", - (u_longlong_t)zfs_refcount_count(&dev->l2ad_lb_count)); } else if (err != 0) { spa_history_log_internal(spa, "L2ARC rebuild", NULL, "aborted, restored %llu blocks", @@ -10502,7 +10714,8 @@ l2arc_hdr_restore(const l2arc_log_ent_phys_t *le, l2arc_dev_t *dev) arc_buf_hdr_t *hdr, *exists; kmutex_t *hash_lock; arc_buf_contents_t type = L2BLK_GET_TYPE((le)->le_prop); - uint64_t asize; + uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev, + L2BLK_GET_PSIZE((le)->le_prop)); /* * Do all the allocation before grabbing any locks, this lets us @@ -10511,13 +10724,11 @@ l2arc_hdr_restore(const l2arc_log_ent_phys_t *le, l2arc_dev_t *dev) */ hdr = arc_buf_alloc_l2only(L2BLK_GET_LSIZE((le)->le_prop), type, dev, le->le_dva, le->le_daddr, - L2BLK_GET_PSIZE((le)->le_prop), le->le_birth, + L2BLK_GET_PSIZE((le)->le_prop), asize, le->le_birth, L2BLK_GET_COMPRESS((le)->le_prop), le->le_complevel, L2BLK_GET_PROTECTED((le)->le_prop), L2BLK_GET_PREFETCH((le)->le_prop), L2BLK_GET_STATE((le)->le_prop)); - asize = vdev_psize_to_asize(dev->l2ad_vdev, - L2BLK_GET_PSIZE((le)->le_prop)); /* * vdev_space_update() has to be called before arc_hdr_destroy() to @@ -10547,6 +10758,8 @@ l2arc_hdr_restore(const l2arc_log_ent_phys_t *le, l2arc_dev_t *dev) exists->b_l2hdr.b_daddr = le->le_daddr; exists->b_l2hdr.b_arcs_state = L2BLK_GET_STATE((le)->le_prop); + /* l2arc_hdr_arcstats_update() expects a valid asize */ + HDR_SET_L2SIZE(exists, asize); mutex_enter(&dev->l2ad_mtx); list_insert_tail(&dev->l2ad_buflist, exists); (void) zfs_refcount_add_many(&dev->l2ad_alloc, diff --git a/module/zfs/brt.c b/module/zfs/brt.c index c48527db7d17..7d94214143ea 100644 --- a/module/zfs/brt.c +++ b/module/zfs/brt.c @@ -642,6 +642,7 @@ brt_vdev_destroy(spa_t *spa, brt_vdev_t *brtvd, dmu_tx_t *tx) BRT_DEBUG("MOS BRT VDEV destroyed, object=%llu", (u_longlong_t)brtvd->bv_mos_brtvdev); brtvd->bv_mos_brtvdev = 0; + brtvd->bv_entcount_dirty = FALSE; snprintf(name, sizeof (name), "%s%llu", BRT_OBJECT_VDEV_PREFIX, (u_longlong_t)brtvd->bv_vdevid); @@ -1472,11 +1473,9 @@ brt_unload(spa_t *spa) spa->spa_brt_rangesize = 0; } -/* BEGIN CSTYLED */ ZFS_MODULE_PARAM(zfs_brt, , brt_zap_prefetch, INT, ZMOD_RW, "Enable prefetching of BRT ZAP entries"); ZFS_MODULE_PARAM(zfs_brt, , brt_zap_default_bs, UINT, ZMOD_RW, "BRT ZAP leaf blockshift"); ZFS_MODULE_PARAM(zfs_brt, , brt_zap_default_ibs, UINT, ZMOD_RW, "BRT ZAP indirect blockshift"); -/* END CSTYLED */ diff --git a/module/zfs/btree.c b/module/zfs/btree.c index 9c52083603f1..bff2b6c21f44 100644 --- a/module/zfs/btree.c +++ b/module/zfs/btree.c @@ -2208,8 +2208,6 @@ zfs_btree_verify(zfs_btree_t *tree) zfs_btree_verify_poison(tree); } -/* BEGIN CSTYLED */ ZFS_MODULE_PARAM(zfs, zfs_, btree_verify_intensity, UINT, ZMOD_RW, "Enable btree verification. Levels above 4 require ZFS be built " "with debugging"); -/* END CSTYLED */ diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index cbd07d19a7f9..190d8ded39b0 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -2921,7 +2921,7 @@ dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx, boolean_t canfail) * pending clone and mark the block as uncached. This will be * as if the clone was never done. */ - if (dr && dr->dt.dl.dr_brtwrite) { + if (db->db_state == DB_NOFILL) { VERIFY(!dbuf_undirty(db, tx)); db->db_state = DB_UNCACHED; } diff --git a/module/zfs/ddt_zap.c b/module/zfs/ddt_zap.c index 137fe487a997..64924bc4fa61 100644 --- a/module/zfs/ddt_zap.c +++ b/module/zfs/ddt_zap.c @@ -258,9 +258,7 @@ const ddt_ops_t ddt_zap_ops = { ddt_zap_count, }; -/* BEGIN CSTYLED */ ZFS_MODULE_PARAM(zfs_dedup, , ddt_zap_default_bs, UINT, ZMOD_RW, "DDT ZAP leaf blockshift"); ZFS_MODULE_PARAM(zfs_dedup, , ddt_zap_default_ibs, UINT, ZMOD_RW, "DDT ZAP indirect blockshift"); -/* END CSTYLED */ diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 362415a25895..32609399b79e 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -1221,6 +1221,7 @@ dmu_read_impl(dnode_t *dn, uint64_t offset, uint64_t size, bufoff = offset - db->db_offset; tocpy = MIN(db->db_size - bufoff, size); + ASSERT(db->db_data != NULL); (void) memcpy(buf, (char *)db->db_data + bufoff, tocpy); offset += tocpy; @@ -1278,6 +1279,7 @@ dmu_write_impl(dmu_buf_t **dbp, int numbufs, uint64_t offset, uint64_t size, else dmu_buf_will_dirty(db, tx); + ASSERT(db->db_data != NULL); (void) memcpy((char *)db->db_data + bufoff, buf, tocpy); if (tocpy == db->db_size) @@ -1426,6 +1428,7 @@ dmu_read_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size) bufoff = zfs_uio_offset(uio) - db->db_offset; tocpy = MIN(db->db_size - bufoff, size); + ASSERT(db->db_data != NULL); err = zfs_uio_fault_move((char *)db->db_data + bufoff, tocpy, UIO_READ, uio); @@ -1550,6 +1553,7 @@ dmu_write_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size, dmu_tx_t *tx) else dmu_buf_will_dirty(db, tx); + ASSERT(db->db_data != NULL); err = zfs_uio_fault_move((char *)db->db_data + bufoff, tocpy, UIO_WRITE, uio); @@ -2938,10 +2942,8 @@ ZFS_MODULE_PARAM(zfs, zfs_, per_txg_dirty_frees_percent, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, dmu_offset_next_sync, INT, ZMOD_RW, "Enable forcing txg sync to find holes"); -/* CSTYLED */ ZFS_MODULE_PARAM(zfs, , dmu_prefetch_max, UINT, ZMOD_RW, "Limit one prefetch call to this size"); -/* CSTYLED */ ZFS_MODULE_PARAM(zfs, , dmu_ddt_copies, UINT, ZMOD_RW, "Override copies= for dedup objects"); diff --git a/module/zfs/dmu_object.c b/module/zfs/dmu_object.c index 56986ea43446..344b0e3750e9 100644 --- a/module/zfs/dmu_object.c +++ b/module/zfs/dmu_object.c @@ -519,7 +519,5 @@ EXPORT_SYMBOL(dmu_object_next); EXPORT_SYMBOL(dmu_object_zapify); EXPORT_SYMBOL(dmu_object_free_zapified); -/* BEGIN CSTYLED */ ZFS_MODULE_PARAM(zfs, , dmu_object_alloc_chunk_shift, UINT, ZMOD_RW, "CPU-specific allocator grabs 2^N objects at once"); -/* END CSTYLED */ diff --git a/module/zfs/dmu_recv.c b/module/zfs/dmu_recv.c index b1cd981cec1d..a33216be6ecf 100644 --- a/module/zfs/dmu_recv.c +++ b/module/zfs/dmu_recv.c @@ -3843,4 +3843,3 @@ ZFS_MODULE_PARAM(zfs_recv, zfs_recv_, write_batch_size, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs_recv, zfs_recv_, best_effort_corrective, INT, ZMOD_RW, "Ignore errors during corrective receive"); -/* END CSTYLED */ diff --git a/module/zfs/dmu_traverse.c b/module/zfs/dmu_traverse.c index 15cc2885e805..aa0434f3c722 100644 --- a/module/zfs/dmu_traverse.c +++ b/module/zfs/dmu_traverse.c @@ -818,6 +818,5 @@ MODULE_PARM_DESC(ignore_hole_birth, "Alias for send_holes_without_birth_time"); #endif -/* CSTYLED */ ZFS_MODULE_PARAM(zfs, , send_holes_without_birth_time, INT, ZMOD_RW, "Ignore hole_birth txg for zfs send"); diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c index 6aee7afb6954..e4895a6bcd7f 100644 --- a/module/zfs/dmu_tx.c +++ b/module/zfs/dmu_tx.c @@ -800,6 +800,14 @@ dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db) case THT_CLONE: if (blkid >= beginblk && blkid <= endblk) match_offset = TRUE; + /* + * They might have to increase nlevels, + * thus dirtying the new TLIBs. Or the + * might have to change the block size, + * thus dirying the new lvl=0 blk=0. + */ + if (blkid == 0) + match_offset = TRUE; break; default: cmn_err(CE_PANIC, "bad txh_type %d", diff --git a/module/zfs/dmu_zfetch.c b/module/zfs/dmu_zfetch.c index ed50f1889b59..ea593c0d86e1 100644 --- a/module/zfs/dmu_zfetch.c +++ b/module/zfs/dmu_zfetch.c @@ -63,8 +63,8 @@ static unsigned int zfetch_min_distance = 4 * 1024 * 1024; /* max bytes to prefetch per stream (default 64MB) */ unsigned int zfetch_max_distance = 64 * 1024 * 1024; #endif -/* max bytes to prefetch indirects for per stream (default 64MB) */ -unsigned int zfetch_max_idistance = 64 * 1024 * 1024; +/* max bytes to prefetch indirects for per stream (default 128MB) */ +unsigned int zfetch_max_idistance = 128 * 1024 * 1024; /* max request reorder distance within a stream (default 16MB) */ unsigned int zfetch_max_reorder = 16 * 1024 * 1024; /* Max log2 fraction of holes in a stream */ @@ -472,6 +472,7 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks, zstream_t *zs; spa_t *spa = zf->zf_dnode->dn_objset->os_spa; zfs_prefetch_type_t os_prefetch = zf->zf_dnode->dn_objset->os_prefetch; + int64_t ipf_start, ipf_end; if (zfs_prefetch_disable || os_prefetch == ZFS_PREFETCH_NONE) return (NULL); @@ -571,13 +572,13 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks, * This access is not part of any existing stream. Create a new * stream for it unless we are at the end of file. */ + ASSERT0P(zs); if (end_blkid < maxblkid) dmu_zfetch_stream_create(zf, end_blkid); mutex_exit(&zf->zf_lock); - if (!have_lock) - rw_exit(&zf->zf_dnode->dn_struct_rwlock); ZFETCHSTAT_BUMP(zfetchstat_misses); - return (NULL); + ipf_start = 0; + goto prescient; hit: nblks = dmu_zfetch_hit(zs, nblks); @@ -650,6 +651,7 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks, pf_nblks = zs->zs_ipf_dist >> dbs; if (zs->zs_ipf_start < zs->zs_pf_end) zs->zs_ipf_start = zs->zs_pf_end; + ipf_start = zs->zs_ipf_end; if (zs->zs_ipf_end < zs->zs_pf_end + pf_nblks) zs->zs_ipf_end = zs->zs_pf_end + pf_nblks; @@ -658,8 +660,30 @@ dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks, zfs_refcount_add(&zs->zs_callers, NULL); mutex_exit(&zf->zf_lock); +prescient: + /* + * Prefetch the following indirect blocks for this access to reduce + * dbuf_hold() sync read delays in dmu_buf_hold_array_by_dnode(). + * This covers the gap during the first couple accesses when we can + * not predict the future yet, but know what is needed right now. + * This should be very rare for reads/writes to need more than one + * indirect, but more useful for cloning due to much bigger accesses. + */ + ipf_start = MAX(ipf_start, blkid + 1); + int epbs = zf->zf_dnode->dn_indblkshift - SPA_BLKPTRSHIFT; + ipf_start = P2ROUNDUP(ipf_start, 1 << epbs) >> epbs; + ipf_end = P2ROUNDUP(end_blkid, 1 << epbs) >> epbs; + + int issued = 0; + for (int64_t iblk = ipf_start; iblk < ipf_end; iblk++) { + issued += dbuf_prefetch(zf->zf_dnode, 1, iblk, + ZIO_PRIORITY_SYNC_READ, ARC_FLAG_PRESCIENT_PREFETCH); + } + if (!have_lock) rw_exit(&zf->zf_dnode->dn_struct_rwlock); + if (issued) + ZFETCHSTAT_ADD(zfetchstat_io_issued, issued); return (zs); } diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index 4712addf81be..629edd813fb9 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -701,13 +701,17 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, const void *tag, ZPOOL_ERRATA_ZOL_8308_ENCRYPTION; } - dsl_deadlist_open(&ds->ds_deadlist, - mos, dsl_dataset_phys(ds)->ds_deadlist_obj); - uint64_t remap_deadlist_obj = - dsl_dataset_get_remap_deadlist_object(ds); - if (remap_deadlist_obj != 0) { - dsl_deadlist_open(&ds->ds_remap_deadlist, mos, - remap_deadlist_obj); + if (err == 0) { + err = dsl_deadlist_open(&ds->ds_deadlist, + mos, dsl_dataset_phys(ds)->ds_deadlist_obj); + } + if (err == 0) { + uint64_t remap_deadlist_obj = + dsl_dataset_get_remap_deadlist_object(ds); + if (remap_deadlist_obj != 0) { + err = dsl_deadlist_open(&ds->ds_remap_deadlist, + mos, remap_deadlist_obj); + } } dmu_buf_init_user(&ds->ds_dbu, dsl_dataset_evict_sync, @@ -716,7 +720,8 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, const void *tag, winner = dmu_buf_set_user_ie(dbuf, &ds->ds_dbu); if (err != 0 || winner != NULL) { - dsl_deadlist_close(&ds->ds_deadlist); + if (dsl_deadlist_is_open(&ds->ds_deadlist)) + dsl_deadlist_close(&ds->ds_deadlist); if (dsl_deadlist_is_open(&ds->ds_remap_deadlist)) dsl_deadlist_close(&ds->ds_remap_deadlist); dsl_bookmark_fini_ds(ds); @@ -1823,8 +1828,8 @@ dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname, dsl_deadlist_clone(&ds->ds_deadlist, UINT64_MAX, dsl_dataset_phys(ds)->ds_prev_snap_obj, tx); dsl_deadlist_close(&ds->ds_deadlist); - dsl_deadlist_open(&ds->ds_deadlist, mos, - dsl_dataset_phys(ds)->ds_deadlist_obj); + VERIFY0(dsl_deadlist_open(&ds->ds_deadlist, mos, + dsl_dataset_phys(ds)->ds_deadlist_obj)); dsl_deadlist_add_key(&ds->ds_deadlist, dsl_dataset_phys(ds)->ds_prev_snap_txg, tx); dsl_bookmark_snapshotted(ds, tx); @@ -4044,14 +4049,14 @@ dsl_dataset_swap_remap_deadlists(dsl_dataset_t *clone, if (clone_remap_dl_obj != 0) { dsl_dataset_set_remap_deadlist_object(origin, clone_remap_dl_obj, tx); - dsl_deadlist_open(&origin->ds_remap_deadlist, - dp->dp_meta_objset, clone_remap_dl_obj); + VERIFY0(dsl_deadlist_open(&origin->ds_remap_deadlist, + dp->dp_meta_objset, clone_remap_dl_obj)); } if (origin_remap_dl_obj != 0) { dsl_dataset_set_remap_deadlist_object(clone, origin_remap_dl_obj, tx); - dsl_deadlist_open(&clone->ds_remap_deadlist, - dp->dp_meta_objset, origin_remap_dl_obj); + VERIFY0(dsl_deadlist_open(&clone->ds_remap_deadlist, + dp->dp_meta_objset, origin_remap_dl_obj)); } } @@ -4222,10 +4227,10 @@ dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone, dsl_deadlist_close(&origin_head->ds_deadlist); SWITCH64(dsl_dataset_phys(origin_head)->ds_deadlist_obj, dsl_dataset_phys(clone)->ds_deadlist_obj); - dsl_deadlist_open(&clone->ds_deadlist, dp->dp_meta_objset, - dsl_dataset_phys(clone)->ds_deadlist_obj); - dsl_deadlist_open(&origin_head->ds_deadlist, dp->dp_meta_objset, - dsl_dataset_phys(origin_head)->ds_deadlist_obj); + VERIFY0(dsl_deadlist_open(&clone->ds_deadlist, dp->dp_meta_objset, + dsl_dataset_phys(clone)->ds_deadlist_obj)); + VERIFY0(dsl_deadlist_open(&origin_head->ds_deadlist, dp->dp_meta_objset, + dsl_dataset_phys(origin_head)->ds_deadlist_obj)); dsl_dataset_swap_remap_deadlists(clone, origin_head, tx); /* @@ -4959,8 +4964,8 @@ dsl_dataset_create_remap_deadlist(dsl_dataset_t *ds, dmu_tx_t *tx) dsl_dataset_phys(ds)->ds_prev_snap_obj, tx); dsl_dataset_set_remap_deadlist_object(ds, remap_deadlist_obj, tx); - dsl_deadlist_open(&ds->ds_remap_deadlist, spa_meta_objset(spa), - remap_deadlist_obj); + VERIFY0(dsl_deadlist_open(&ds->ds_remap_deadlist, spa_meta_objset(spa), + remap_deadlist_obj)); spa_feature_incr(spa, SPA_FEATURE_OBSOLETE_COUNTS, tx); } diff --git a/module/zfs/dsl_deadlist.c b/module/zfs/dsl_deadlist.c index e457e2fd86ef..b1be8fae3b47 100644 --- a/module/zfs/dsl_deadlist.c +++ b/module/zfs/dsl_deadlist.c @@ -299,30 +299,33 @@ dsl_deadlist_iterate(dsl_deadlist_t *dl, deadlist_iter_t func, void *args) } } -void +int dsl_deadlist_open(dsl_deadlist_t *dl, objset_t *os, uint64_t object) { dmu_object_info_t doi; + int err; ASSERT(!dsl_deadlist_is_open(dl)); mutex_init(&dl->dl_lock, NULL, MUTEX_DEFAULT, NULL); dl->dl_os = os; dl->dl_object = object; - VERIFY0(dmu_bonus_hold(os, object, dl, &dl->dl_dbuf)); + err = dmu_bonus_hold(os, object, dl, &dl->dl_dbuf); + if (err != 0) + return (err); dmu_object_info_from_db(dl->dl_dbuf, &doi); if (doi.doi_type == DMU_OT_BPOBJ) { dmu_buf_rele(dl->dl_dbuf, dl); dl->dl_dbuf = NULL; dl->dl_oldfmt = B_TRUE; - VERIFY0(bpobj_open(&dl->dl_bpobj, os, object)); - return; + return (bpobj_open(&dl->dl_bpobj, os, object)); } dl->dl_oldfmt = B_FALSE; dl->dl_phys = dl->dl_dbuf->db_data; dl->dl_havetree = B_FALSE; dl->dl_havecache = B_FALSE; + return (0); } boolean_t @@ -686,7 +689,7 @@ dsl_deadlist_regenerate(objset_t *os, uint64_t dlobj, dsl_deadlist_t dl = { 0 }; dsl_pool_t *dp = dmu_objset_pool(os); - dsl_deadlist_open(&dl, os, dlobj); + VERIFY0(dsl_deadlist_open(&dl, os, dlobj)); if (dl.dl_oldfmt) { dsl_deadlist_close(&dl); return; diff --git a/module/zfs/dsl_destroy.c b/module/zfs/dsl_destroy.c index b2b925b135f7..e6c7e79ed6c0 100644 --- a/module/zfs/dsl_destroy.c +++ b/module/zfs/dsl_destroy.c @@ -182,10 +182,10 @@ process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev, dsl_dataset_phys(ds)->ds_deadlist_obj = dsl_dataset_phys(ds_next)->ds_deadlist_obj; dsl_dataset_phys(ds_next)->ds_deadlist_obj = deadlist_obj; - dsl_deadlist_open(&ds->ds_deadlist, mos, - dsl_dataset_phys(ds)->ds_deadlist_obj); - dsl_deadlist_open(&ds_next->ds_deadlist, mos, - dsl_dataset_phys(ds_next)->ds_deadlist_obj); + VERIFY0(dsl_deadlist_open(&ds->ds_deadlist, mos, + dsl_dataset_phys(ds)->ds_deadlist_obj)); + VERIFY0(dsl_deadlist_open(&ds_next->ds_deadlist, mos, + dsl_dataset_phys(ds_next)->ds_deadlist_obj)); } typedef struct remaining_clones_key { diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c index 1b60fa620b8d..71f151b14d9b 100644 --- a/module/zfs/dsl_dir.c +++ b/module/zfs/dsl_dir.c @@ -272,9 +272,11 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj, err = zap_lookup(dp->dp_meta_objset, dd->dd_object, DD_FIELD_LIVELIST, sizeof (uint64_t), 1, &obj); - if (err == 0) - dsl_dir_livelist_open(dd, obj); - else if (err != ENOENT) + if (err == 0) { + err = dsl_dir_livelist_open(dd, obj); + if (err != 0) + goto errout; + } else if (err != ENOENT) goto errout; } } @@ -2301,15 +2303,18 @@ dsl_dir_is_zapified(dsl_dir_t *dd) return (doi.doi_type == DMU_OTN_ZAP_METADATA); } -void +int dsl_dir_livelist_open(dsl_dir_t *dd, uint64_t obj) { objset_t *mos = dd->dd_pool->dp_meta_objset; ASSERT(spa_feature_is_active(dd->dd_pool->dp_spa, SPA_FEATURE_LIVELIST)); - dsl_deadlist_open(&dd->dd_livelist, mos, obj); + int err = dsl_deadlist_open(&dd->dd_livelist, mos, obj); + if (err != 0) + return (err); bplist_create(&dd->dd_pending_allocs); bplist_create(&dd->dd_pending_frees); + return (0); } void @@ -2489,6 +2494,5 @@ EXPORT_SYMBOL(dsl_dir_set_quota); EXPORT_SYMBOL(dsl_dir_set_reservation); #endif -/* CSTYLED */ ZFS_MODULE_PARAM(zfs, , zvol_enforce_quotas, INT, ZMOD_RW, "Enable strict ZVOL quota enforcment"); diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c index 5ae96882935c..39f97d7547c6 100644 --- a/module/zfs/dsl_pool.c +++ b/module/zfs/dsl_pool.c @@ -404,13 +404,21 @@ dsl_pool_close(dsl_pool_t *dp) taskq_destroy(dp->dp_zil_clean_taskq); spa_sync_tq_destroy(dp->dp_spa); - /* - * We can't set retry to TRUE since we're explicitly specifying - * a spa to flush. This is good enough; any missed buffers for - * this spa won't cause trouble, and they'll eventually fall - * out of the ARC just like any other unused buffer. - */ - arc_flush(dp->dp_spa, FALSE); + if (dp->dp_spa->spa_state == POOL_STATE_EXPORTED || + dp->dp_spa->spa_state == POOL_STATE_DESTROYED) { + /* + * On export/destroy perform the ARC flush asynchronously. + */ + arc_flush_async(dp->dp_spa); + } else { + /* + * We can't set retry to TRUE since we're explicitly specifying + * a spa to flush. This is good enough; any missed buffers for + * this spa won't cause trouble, and they'll eventually fall + * out of the ARC just like any other unused buffer. + */ + arc_flush(dp->dp_spa, FALSE); + } mmp_fini(dp->dp_spa); txg_fini(dp); diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 6cd0dbdea195..3eba4cb35cc6 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -231,6 +231,9 @@ static uint_t zfs_resilver_defer_percent = 10; ((scn)->scn_phys.scn_func == POOL_SCAN_SCRUB || \ (scn)->scn_phys.scn_func == POOL_SCAN_RESILVER) +#define DSL_SCAN_IS_SCRUB(scn) \ + ((scn)->scn_phys.scn_func == POOL_SCAN_SCRUB) + /* * Enable/disable the processing of the free_bpobj object. */ @@ -855,15 +858,15 @@ dsl_scan_setup_check(void *arg, dmu_tx_t *tx) void dsl_scan_setup_sync(void *arg, dmu_tx_t *tx) { - (void) arg; + setup_sync_arg_t *setup_sync_arg = (setup_sync_arg_t *)arg; dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan; - pool_scan_func_t *funcp = arg; dmu_object_type_t ot = 0; dsl_pool_t *dp = scn->scn_dp; spa_t *spa = dp->dp_spa; ASSERT(!dsl_scan_is_running(scn)); - ASSERT(*funcp > POOL_SCAN_NONE && *funcp < POOL_SCAN_FUNCS); + ASSERT3U(setup_sync_arg->func, >, POOL_SCAN_NONE); + ASSERT3U(setup_sync_arg->func, <, POOL_SCAN_FUNCS); memset(&scn->scn_phys, 0, sizeof (scn->scn_phys)); /* @@ -873,10 +876,14 @@ dsl_scan_setup_sync(void *arg, dmu_tx_t *tx) memset(&scn->errorscrub_phys, 0, sizeof (scn->errorscrub_phys)); dsl_errorscrub_sync_state(scn, tx); - scn->scn_phys.scn_func = *funcp; + scn->scn_phys.scn_func = setup_sync_arg->func; scn->scn_phys.scn_state = DSS_SCANNING; - scn->scn_phys.scn_min_txg = 0; - scn->scn_phys.scn_max_txg = tx->tx_txg; + scn->scn_phys.scn_min_txg = setup_sync_arg->txgstart; + if (setup_sync_arg->txgend == 0) { + scn->scn_phys.scn_max_txg = tx->tx_txg; + } else { + scn->scn_phys.scn_max_txg = setup_sync_arg->txgend; + } scn->scn_phys.scn_ddt_class_max = DDT_CLASSES - 1; /* the entire DDT */ scn->scn_phys.scn_start_time = gethrestime_sec(); scn->scn_phys.scn_errors = 0; @@ -963,7 +970,7 @@ dsl_scan_setup_sync(void *arg, dmu_tx_t *tx) spa_history_log_internal(spa, "scan setup", tx, "func=%u mintxg=%llu maxtxg=%llu", - *funcp, (u_longlong_t)scn->scn_phys.scn_min_txg, + setup_sync_arg->func, (u_longlong_t)scn->scn_phys.scn_min_txg, (u_longlong_t)scn->scn_phys.scn_max_txg); } @@ -973,10 +980,16 @@ dsl_scan_setup_sync(void *arg, dmu_tx_t *tx) * error scrub. */ int -dsl_scan(dsl_pool_t *dp, pool_scan_func_t func) +dsl_scan(dsl_pool_t *dp, pool_scan_func_t func, uint64_t txgstart, + uint64_t txgend) { spa_t *spa = dp->dp_spa; dsl_scan_t *scn = dp->dp_scan; + setup_sync_arg_t setup_sync_arg; + + if (func != POOL_SCAN_SCRUB && (txgstart != 0 || txgend != 0)) { + return (EINVAL); + } /* * Purge all vdev caches and probe all devices. We do this here @@ -1027,8 +1040,13 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func) return (SET_ERROR(err)); } + setup_sync_arg.func = func; + setup_sync_arg.txgstart = txgstart; + setup_sync_arg.txgend = txgend; + return (dsl_sync_task(spa_name(spa), dsl_scan_setup_check, - dsl_scan_setup_sync, &func, 0, ZFS_SPACE_CHECK_EXTRA_RESERVED)); + dsl_scan_setup_sync, &setup_sync_arg, 0, + ZFS_SPACE_CHECK_EXTRA_RESERVED)); } static void @@ -1116,15 +1134,24 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) spa_notify_waiters(spa); - if (dsl_scan_restarting(scn, tx)) + if (dsl_scan_restarting(scn, tx)) { spa_history_log_internal(spa, "scan aborted, restarting", tx, "errors=%llu", (u_longlong_t)spa_approx_errlog_size(spa)); - else if (!complete) + } else if (!complete) { spa_history_log_internal(spa, "scan cancelled", tx, "errors=%llu", (u_longlong_t)spa_approx_errlog_size(spa)); - else + } else { spa_history_log_internal(spa, "scan done", tx, "errors=%llu", (u_longlong_t)spa_approx_errlog_size(spa)); + if (DSL_SCAN_IS_SCRUB(scn)) { + VERIFY0(zap_update(dp->dp_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_LAST_SCRUBBED_TXG, + sizeof (uint64_t), 1, + &scn->scn_phys.scn_max_txg, tx)); + spa->spa_scrubbed_last_txg = scn->scn_phys.scn_max_txg; + } + } if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) { spa->spa_scrub_active = B_FALSE; @@ -4330,14 +4357,18 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) * current scan progress is below zfs_resilver_defer_percent. */ if (dsl_scan_restarting(scn, tx) || restart_early) { - pool_scan_func_t func = POOL_SCAN_SCRUB; + setup_sync_arg_t setup_sync_arg = { + .func = POOL_SCAN_SCRUB, + .txgstart = 0, + .txgend = 0, + }; dsl_scan_done(scn, B_FALSE, tx); if (vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) - func = POOL_SCAN_RESILVER; + setup_sync_arg.func = POOL_SCAN_RESILVER; zfs_dbgmsg("restarting scan func=%u on %s txg=%llu early=%d", - func, dp->dp_spa->spa_name, (longlong_t)tx->tx_txg, - restart_early); - dsl_scan_setup_sync(&func, tx); + setup_sync_arg.func, dp->dp_spa->spa_name, + (longlong_t)tx->tx_txg, restart_early); + dsl_scan_setup_sync(&setup_sync_arg, tx); } /* @@ -5314,4 +5345,3 @@ ZFS_MODULE_PARAM(zfs, zfs_, resilver_defer_percent, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, scrub_error_blocks_per_txg, UINT, ZMOD_RW, "Error blocks to be scrubbed in one txg"); -/* END CSTYLED */ diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index 3bd6e93e93a4..7affbfac9dc7 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -6226,7 +6226,6 @@ ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, unload_delay, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, unload_delay_ms, UINT, ZMOD_RW, "Delay in milliseconds after metaslab was last used before unloading"); -/* BEGIN CSTYLED */ ZFS_MODULE_PARAM(zfs_mg, zfs_mg_, noalloc_threshold, UINT, ZMOD_RW, "Percentage of metaslab group size that should be free to make it " "eligible for allocation"); @@ -6239,7 +6238,6 @@ ZFS_MODULE_PARAM(zfs_mg, zfs_mg_, fragmentation_threshold, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, fragmentation_factor_enabled, INT, ZMOD_RW, "Use the fragmentation metric to prefer less fragmented metaslabs"); -/* END CSTYLED */ ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, fragmentation_threshold, UINT, ZMOD_RW, "Fragmentation for metaslab to allow allocation"); @@ -6280,8 +6278,6 @@ ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, try_hard_before_gang, INT, ZFS_MODULE_PARAM(zfs_metaslab, zfs_metaslab_, find_max_tries, UINT, ZMOD_RW, "Normally only consider this many of the best metaslabs in each vdev"); -/* BEGIN CSTYLED */ ZFS_MODULE_PARAM_CALL(zfs, zfs_, active_allocator, param_set_active_allocator, param_get_charp, ZMOD_RW, "SPA active allocator"); -/* END CSTYLED */ diff --git a/module/zfs/mmp.c b/module/zfs/mmp.c index 71122542758d..493884cf04c4 100644 --- a/module/zfs/mmp.c +++ b/module/zfs/mmp.c @@ -736,11 +736,9 @@ mmp_signal_all_threads(void) mutex_exit(&spa_namespace_lock); } -/* BEGIN CSTYLED */ ZFS_MODULE_PARAM_CALL(zfs_multihost, zfs_multihost_, interval, param_set_multihost_interval, spl_param_get_u64, ZMOD_RW, "Milliseconds between mmp writes to each leaf"); -/* END CSTYLED */ ZFS_MODULE_PARAM(zfs_multihost, zfs_multihost_, fail_intervals, UINT, ZMOD_RW, "Max allowed period without a successful mmp write"); diff --git a/module/zfs/refcount.c b/module/zfs/refcount.c index 718bbb34a8d5..0dd7da1aa197 100644 --- a/module/zfs/refcount.c +++ b/module/zfs/refcount.c @@ -349,11 +349,9 @@ EXPORT_SYMBOL(zfs_refcount_add); EXPORT_SYMBOL(zfs_refcount_remove); EXPORT_SYMBOL(zfs_refcount_held); -/* BEGIN CSTYLED */ ZFS_MODULE_PARAM(zfs, , reference_tracking_enable, INT, ZMOD_RW, "Track reference holders to refcount_t objects"); ZFS_MODULE_PARAM(zfs, , reference_history, UINT, ZMOD_RW, "Maximum reference holders being tracked"); -/* END CSTYLED */ #endif /* ZFS_DEBUG */ diff --git a/module/zfs/spa.c b/module/zfs/spa.c index bf3b0d143db4..b83c982c13fd 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -451,9 +451,10 @@ spa_prop_get_config(spa_t *spa, nvlist_t *nv) spa_prop_add_list(nv, ZPOOL_PROP_DEDUP_TABLE_SIZE, NULL, ddt_get_ddt_dsize(spa), src); - spa_prop_add_list(nv, ZPOOL_PROP_HEALTH, NULL, rvd->vdev_state, src); + spa_prop_add_list(nv, ZPOOL_PROP_LAST_SCRUBBED_TXG, NULL, + spa_get_last_scrubbed_txg(spa), src); version = spa_version(spa); if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) { @@ -3065,7 +3066,7 @@ spa_livelist_delete_cb(void *arg, zthr_t *z) dsl_deadlist_entry_t *dle; bplist_t to_free; ll = kmem_zalloc(sizeof (dsl_deadlist_t), KM_SLEEP); - dsl_deadlist_open(ll, mos, ll_obj); + VERIFY0(dsl_deadlist_open(ll, mos, ll_obj)); dle = dsl_deadlist_first(ll); ASSERT3P(dle, !=, NULL); bplist_create(&to_free); @@ -4727,6 +4728,12 @@ spa_ld_get_props(spa_t *spa) if (error != 0 && error != ENOENT) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + /* Load the last scrubbed txg. */ + error = spa_dir_prop(spa, DMU_POOL_LAST_SCRUBBED_TXG, + &spa->spa_scrubbed_last_txg, B_FALSE); + if (error != 0 && error != ENOENT) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + /* * Load the livelist deletion field. If a livelist is queued for * deletion, indicate that in the spa @@ -8869,6 +8876,13 @@ spa_scan_stop(spa_t *spa) int spa_scan(spa_t *spa, pool_scan_func_t func) +{ + return (spa_scan_range(spa, func, 0, 0)); +} + +int +spa_scan_range(spa_t *spa, pool_scan_func_t func, uint64_t txgstart, + uint64_t txgend) { ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0); @@ -8879,6 +8893,9 @@ spa_scan(spa_t *spa, pool_scan_func_t func) !spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER)) return (SET_ERROR(ENOTSUP)); + if (func != POOL_SCAN_SCRUB && (txgstart != 0 || txgend != 0)) + return (SET_ERROR(ENOTSUP)); + /* * If a resilver was requested, but there is no DTL on a * writeable leaf device, we have nothing to do. @@ -8893,7 +8910,7 @@ spa_scan(spa_t *spa, pool_scan_func_t func) !spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG)) return (SET_ERROR(ENOTSUP)); - return (dsl_scan(spa->spa_dsl_pool, func)); + return (dsl_scan(spa->spa_dsl_pool, func, txgstart, txgend)); } /* @@ -10976,6 +10993,7 @@ EXPORT_SYMBOL(spa_l2cache_drop); /* scanning */ EXPORT_SYMBOL(spa_scan); +EXPORT_SYMBOL(spa_scan_range); EXPORT_SYMBOL(spa_scan_stop); /* spa syncing */ @@ -10993,11 +11011,9 @@ EXPORT_SYMBOL(spa_event_notify); ZFS_MODULE_PARAM(zfs_metaslab, metaslab_, preload_pct, UINT, ZMOD_RW, "Percentage of CPUs to run a metaslab preload taskq"); -/* BEGIN CSTYLED */ ZFS_MODULE_PARAM(zfs_spa, spa_, load_verify_shift, UINT, ZMOD_RW, "log2 fraction of arc that can be used by inflight I/Os when " "verifying pool during import"); -/* END CSTYLED */ ZFS_MODULE_PARAM(zfs_spa, spa_, load_verify_metadata, INT, ZMOD_RW, "Set to traverse metadata on pool import"); @@ -11014,11 +11030,9 @@ ZFS_MODULE_PARAM(zfs_zio, zio_, taskq_batch_pct, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs_zio, zio_, taskq_batch_tpq, UINT, ZMOD_RW, "Number of threads per IO worker taskqueue"); -/* BEGIN CSTYLED */ ZFS_MODULE_PARAM(zfs, zfs_, max_missing_tvds, U64, ZMOD_RW, "Allow importing pool with up to this number of missing top-level " "vdevs (in read-only mode)"); -/* END CSTYLED */ ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, zthr_pause, INT, ZMOD_RW, "Set the livelist condense zthr to pause"); @@ -11026,7 +11040,6 @@ ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, zthr_pause, INT, ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, sync_pause, INT, ZMOD_RW, "Set the livelist condense synctask to pause"); -/* BEGIN CSTYLED */ ZFS_MODULE_PARAM(zfs_livelist_condense, zfs_livelist_condense_, sync_cancel, INT, ZMOD_RW, "Whether livelist condensing was canceled in the synctask"); @@ -11048,7 +11061,6 @@ ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs_zio, zio_, taskq_write, spa_taskq_write_param_set, spa_taskq_write_param_get, ZMOD_RW, "Configure IO queues for write IO"); #endif -/* END CSTYLED */ ZFS_MODULE_PARAM(zfs_zio, zio_, taskq_write_tpq, UINT, ZMOD_RW, "Number of CPUs per write issue taskq"); diff --git a/module/zfs/spa_checkpoint.c b/module/zfs/spa_checkpoint.c index 1efff47f87a0..4c3721c159be 100644 --- a/module/zfs/spa_checkpoint.c +++ b/module/zfs/spa_checkpoint.c @@ -633,8 +633,6 @@ EXPORT_SYMBOL(spa_checkpoint_get_stats); EXPORT_SYMBOL(spa_checkpoint_discard_thread); EXPORT_SYMBOL(spa_checkpoint_discard_thread_check); -/* BEGIN CSTYLED */ ZFS_MODULE_PARAM(zfs_spa, zfs_spa_, discard_memory_limit, U64, ZMOD_RW, "Limit for memory used in prefetching the checkpoint space map done " "on each vdev while discarding the checkpoint"); -/* END CSTYLED */ diff --git a/module/zfs/spa_errlog.c b/module/zfs/spa_errlog.c index a49e28ee7a43..18b3970ac0dc 100644 --- a/module/zfs/spa_errlog.c +++ b/module/zfs/spa_errlog.c @@ -1491,8 +1491,6 @@ EXPORT_SYMBOL(zep_to_zb); EXPORT_SYMBOL(name_to_errphys); #endif -/* BEGIN CSTYLED */ ZFS_MODULE_PARAM(zfs_spa, spa_, upgrade_errlog_limit, UINT, ZMOD_RW, "Limit the number of errors which will be upgraded to the new " "on-disk error log when enabling head_errlog"); -/* END CSTYLED */ diff --git a/module/zfs/spa_history.c b/module/zfs/spa_history.c index de036d6c3718..81bb99eb2ccd 100644 --- a/module/zfs/spa_history.c +++ b/module/zfs/spa_history.c @@ -390,6 +390,9 @@ spa_history_log_nvl(spa_t *spa, nvlist_t *nvl) return (err); } + ASSERT3UF(tx->tx_txg, <=, spa_final_dirty_txg(spa), + "Logged %s after final txg was set!", "nvlist"); + VERIFY0(nvlist_dup(nvl, &nvarg, KM_SLEEP)); if (spa_history_zone() != NULL) { fnvlist_add_string(nvarg, ZPOOL_HIST_ZONE, @@ -527,6 +530,9 @@ log_internal(nvlist_t *nvl, const char *operation, spa_t *spa, return; } + ASSERT3UF(tx->tx_txg, <=, spa_final_dirty_txg(spa), + "Logged after final txg was set: %s %s", operation, fmt); + msg = kmem_vasprintf(fmt, adx); fnvlist_add_string(nvl, ZPOOL_HIST_INT_STR, msg); kmem_strfree(msg); diff --git a/module/zfs/spa_log_spacemap.c b/module/zfs/spa_log_spacemap.c index f55218e3579b..a95152608578 100644 --- a/module/zfs/spa_log_spacemap.c +++ b/module/zfs/spa_log_spacemap.c @@ -1364,7 +1364,6 @@ spa_ld_log_spacemaps(spa_t *spa) return (error); } -/* BEGIN CSTYLED */ ZFS_MODULE_PARAM(zfs, zfs_, unflushed_max_mem_amt, U64, ZMOD_RW, "Specific hard-limit in memory that ZFS allows to be used for " "unflushed changes"); @@ -1383,8 +1382,8 @@ ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_block_min, U64, ZMOD_RW, "log spacemap (see zfs_unflushed_log_block_max)"); ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_txg_max, U64, ZMOD_RW, - "Hard limit (upper-bound) in the size of the space map log " - "in terms of dirty TXGs."); + "Hard limit (upper-bound) in the size of the space map log " + "in terms of dirty TXGs."); ZFS_MODULE_PARAM(zfs, zfs_, unflushed_log_block_pct, UINT, ZMOD_RW, "Tunable used to determine the number of blocks that can be used for " @@ -1399,7 +1398,6 @@ ZFS_MODULE_PARAM(zfs, zfs_, max_log_walking, U64, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, keep_log_spacemaps_at_export, INT, ZMOD_RW, "Prevent the log spacemaps from being flushed and destroyed " "during pool export/destroy"); -/* END CSTYLED */ ZFS_MODULE_PARAM(zfs, zfs_, max_logsm_summary_length, U64, ZMOD_RW, "Maximum number of rows allowed in the summary of the spacemap log"); diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 32542e7ce701..7fae51cc2c52 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -1588,6 +1588,34 @@ spa_generate_guid(spa_t *spa) return (guid); } +static boolean_t +spa_load_guid_exists(uint64_t guid) +{ + avl_tree_t *t = &spa_namespace_avl; + + ASSERT(MUTEX_HELD(&spa_namespace_lock)); + + for (spa_t *spa = avl_first(t); spa != NULL; spa = AVL_NEXT(t, spa)) { + if (spa_load_guid(spa) == guid) + return (B_TRUE); + } + + return (arc_async_flush_guid_inuse(guid)); +} + +uint64_t +spa_generate_load_guid(void) +{ + uint64_t guid; + + do { + (void) random_get_pseudo_bytes((void *)&guid, + sizeof (guid)); + } while (guid == 0 || spa_load_guid_exists(guid)); + + return (guid); +} + void snprintf_blkptr(char *buf, size_t buflen, const blkptr_t *bp) { @@ -2676,6 +2704,12 @@ spa_mode(spa_t *spa) return (spa->spa_mode); } +uint64_t +spa_get_last_scrubbed_txg(spa_t *spa) +{ + return (spa->spa_scrubbed_last_txg); +} + uint64_t spa_bootfs(spa_t *spa) { @@ -3117,7 +3151,6 @@ ZFS_MODULE_PARAM(zfs, zfs_, ddt_data_is_special, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, user_indirect_is_special, INT, ZMOD_RW, "Place user data indirect blocks into the special class"); -/* BEGIN CSTYLED */ ZFS_MODULE_PARAM_CALL(zfs_deadman, zfs_deadman_, failmode, param_set_deadman_failmode, param_get_charp, ZMOD_RW, "Failmode for deadman timer"); @@ -3133,7 +3166,6 @@ ZFS_MODULE_PARAM_CALL(zfs_deadman, zfs_deadman_, ziotime_ms, ZFS_MODULE_PARAM(zfs, zfs_, special_class_metadata_reserve_pct, UINT, ZMOD_RW, "Small file blocks in special vdevs depends on this much " "free space available"); -/* END CSTYLED */ ZFS_MODULE_PARAM_CALL(zfs_spa, spa_, slop_shift, param_set_slop_shift, param_get_uint, ZMOD_RW, "Reserved free space in pool"); diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 983f444d79b0..250590f062ea 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -647,7 +647,7 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops) if (spa->spa_root_vdev == NULL) { ASSERT(ops == &vdev_root_ops); spa->spa_root_vdev = vd; - spa->spa_load_guid = spa_generate_guid(NULL); + spa->spa_load_guid = spa_generate_load_guid(); } if (guid == 0 && ops != &vdev_hole_ops) { @@ -6551,7 +6551,6 @@ ZFS_MODULE_PARAM(zfs, zfs_, deadman_events_per_second, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, dio_write_verify_events_per_second, UINT, ZMOD_RW, "Rate Direct I/O write verify events to this many per second"); -/* BEGIN CSTYLED */ ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, direct_write_verify, UINT, ZMOD_RW, "Direct I/O writes will perform for checksum verification before " "commiting write"); @@ -6559,7 +6558,6 @@ ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, direct_write_verify, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, checksum_events_per_second, UINT, ZMOD_RW, "Rate limit checksum events to this many checksum errors per second " "(do not set below ZED threshold)."); -/* END CSTYLED */ ZFS_MODULE_PARAM(zfs, zfs_, scan_ignore_errors, INT, ZMOD_RW, "Ignore errors during resilver/scrub"); @@ -6573,7 +6571,6 @@ ZFS_MODULE_PARAM(zfs, zfs_, nocacheflush, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, embedded_slog_min_ms, UINT, ZMOD_RW, "Minimum number of metaslabs required to dedicate one for log blocks"); -/* BEGIN CSTYLED */ ZFS_MODULE_PARAM_CALL(zfs_vdev, zfs_vdev_, min_auto_ashift, param_set_min_auto_ashift, param_get_uint, ZMOD_RW, "Minimum ashift used when creating new top-level vdevs"); @@ -6582,4 +6579,3 @@ ZFS_MODULE_PARAM_CALL(zfs_vdev, zfs_vdev_, max_auto_ashift, param_set_max_auto_ashift, param_get_uint, ZMOD_RW, "Maximum ashift used when optimizing for logical -> physical sector " "size on new top-level vdevs"); -/* END CSTYLED */ diff --git a/module/zfs/vdev_indirect.c b/module/zfs/vdev_indirect.c index e3dba0257b21..cd24f97ae7cd 100644 --- a/module/zfs/vdev_indirect.c +++ b/module/zfs/vdev_indirect.c @@ -1897,7 +1897,6 @@ EXPORT_SYMBOL(vdev_indirect_sync_obsolete); EXPORT_SYMBOL(vdev_obsolete_counts_are_precise); EXPORT_SYMBOL(vdev_obsolete_sm_object); -/* BEGIN CSTYLED */ ZFS_MODULE_PARAM(zfs_condense, zfs_condense_, indirect_vdevs_enable, INT, ZMOD_RW, "Whether to attempt condensing indirect vdev mappings"); @@ -1922,4 +1921,3 @@ ZFS_MODULE_PARAM(zfs_condense, zfs_condense_, indirect_commit_entry_delay_ms, ZFS_MODULE_PARAM(zfs_reconstruct, zfs_reconstruct_, indirect_combinations_max, UINT, ZMOD_RW, "Maximum number of combinations when reconstructing split segments"); -/* END CSTYLED */ diff --git a/module/zfs/vdev_mirror.c b/module/zfs/vdev_mirror.c index 65a840bf9728..850569d1a35e 100644 --- a/module/zfs/vdev_mirror.c +++ b/module/zfs/vdev_mirror.c @@ -1047,12 +1047,10 @@ ZFS_MODULE_PARAM(zfs_vdev_mirror, zfs_vdev_mirror_, rotating_inc, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs_vdev_mirror, zfs_vdev_mirror_, rotating_seek_inc, INT, ZMOD_RW, "Rotating media load increment for seeking I/Os"); -/* BEGIN CSTYLED */ ZFS_MODULE_PARAM(zfs_vdev_mirror, zfs_vdev_mirror_, rotating_seek_offset, INT, ZMOD_RW, "Offset in bytes from the last I/O which triggers " "a reduced rotating media seek increment"); -/* END CSTYLED */ ZFS_MODULE_PARAM(zfs_vdev_mirror, zfs_vdev_mirror_, non_rotating_inc, INT, ZMOD_RW, "Non-rotating media load increment for non-seeking I/Os"); diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c index 5e330626be2b..e4487c485075 100644 --- a/module/zfs/vdev_raidz.c +++ b/module/zfs/vdev_raidz.c @@ -357,7 +357,11 @@ uint_t raidz_expand_pause_point = 0; /* * Maximum amount of copy io's outstanding at once. */ +#ifdef _ILP32 +static unsigned long raidz_expand_max_copy_bytes = SPA_MAXBLOCKSIZE; +#else static unsigned long raidz_expand_max_copy_bytes = 10 * SPA_MAXBLOCKSIZE; +#endif /* * Apply raidz map abds aggregation if the number of rows in the map is equal @@ -3811,22 +3815,33 @@ raidz_reflow_complete_sync(void *arg, dmu_tx_t *tx) * setup a scrub. All the data has been sucessfully copied * but we have not validated any checksums. */ - pool_scan_func_t func = POOL_SCAN_SCRUB; - if (zfs_scrub_after_expand && dsl_scan_setup_check(&func, tx) == 0) - dsl_scan_setup_sync(&func, tx); + setup_sync_arg_t setup_sync_arg = { + .func = POOL_SCAN_SCRUB, + .txgstart = 0, + .txgend = 0, + }; + if (zfs_scrub_after_expand && + dsl_scan_setup_check(&setup_sync_arg.func, tx) == 0) { + dsl_scan_setup_sync(&setup_sync_arg, tx); + } } /* - * Struct for one copy zio. + * State of one copy batch. */ typedef struct raidz_reflow_arg { - vdev_raidz_expand_t *rra_vre; - zfs_locked_range_t *rra_lr; - uint64_t rra_txg; + vdev_raidz_expand_t *rra_vre; /* Global expantion state. */ + zfs_locked_range_t *rra_lr; /* Range lock of this batch. */ + uint64_t rra_txg; /* TXG of this batch. */ + uint_t rra_ashift; /* Ashift of the vdev. */ + uint32_t rra_tbd; /* Number of in-flight ZIOs. */ + uint32_t rra_writes; /* Number of write ZIOs. */ + zio_t *rra_zio[]; /* Write ZIO pointers. */ } raidz_reflow_arg_t; /* - * The write of the new location is done. + * Write of the new location on one child is done. Once all of them are done + * we can unlock and free everything. */ static void raidz_reflow_write_done(zio_t *zio) @@ -3850,17 +3865,19 @@ raidz_reflow_write_done(zio_t *zio) zio->io_size; } cv_signal(&vre->vre_cv); + boolean_t done = (--rra->rra_tbd == 0); mutex_exit(&vre->vre_lock); - zfs_rangelock_exit(rra->rra_lr); - - kmem_free(rra, sizeof (*rra)); + if (!done) + return; spa_config_exit(zio->io_spa, SCL_STATE, zio->io_spa); + zfs_rangelock_exit(rra->rra_lr); + kmem_free(rra, sizeof (*rra) + sizeof (zio_t *) * rra->rra_writes); } /* - * The read of the old location is done. The parent zio is the write to - * the new location. Allow it to start. + * Read of the old location on one child is done. Once all of them are done + * writes should have all the data and we can issue them. */ static void raidz_reflow_read_done(zio_t *zio) @@ -3868,6 +3885,10 @@ raidz_reflow_read_done(zio_t *zio) raidz_reflow_arg_t *rra = zio->io_private; vdev_raidz_expand_t *vre = rra->rra_vre; + /* Reads of only one block use write ABDs. For bigger free gangs. */ + if (zio->io_size > (1 << rra->rra_ashift)) + abd_free(zio->io_abd); + /* * If the read failed, or if it was done on a vdev that is not fully * healthy (e.g. a child that has a resilver in progress), we may not @@ -3891,7 +3912,11 @@ raidz_reflow_read_done(zio_t *zio) mutex_exit(&vre->vre_lock); } - zio_nowait(zio_unique_parent(zio)); + if (atomic_dec_32_nv(&rra->rra_tbd) > 0) + return; + rra->rra_tbd = rra->rra_writes; + for (uint64_t i = 0; i < rra->rra_writes; i++) + zio_nowait(rra->rra_zio[i]); } static void @@ -3932,21 +3957,19 @@ raidz_reflow_impl(vdev_t *vd, vdev_raidz_expand_t *vre, range_tree_t *rt, dmu_tx_t *tx) { spa_t *spa = vd->vdev_spa; - int ashift = vd->vdev_top->vdev_ashift; - uint64_t offset, size; + uint_t ashift = vd->vdev_top->vdev_ashift; - if (!range_tree_find_in(rt, 0, vd->vdev_top->vdev_asize, - &offset, &size)) { + range_seg_t *rs = range_tree_first(rt); + if (rt == NULL) return (B_FALSE); - } + uint64_t offset = rs_get_start(rs, rt); ASSERT(IS_P2ALIGNED(offset, 1 << ashift)); + uint64_t size = rs_get_end(rs, rt) - offset; ASSERT3U(size, >=, 1 << ashift); - uint64_t length = 1 << ashift; - int txgoff = dmu_tx_get_txg(tx) & TXG_MASK; + ASSERT(IS_P2ALIGNED(size, 1 << ashift)); uint64_t blkid = offset >> ashift; - - int old_children = vd->vdev_children - 1; + uint_t old_children = vd->vdev_children - 1; /* * We can only progress to the point that writes will not overlap @@ -3965,26 +3988,34 @@ raidz_reflow_impl(vdev_t *vd, vdev_raidz_expand_t *vre, range_tree_t *rt, uint64_t next_overwrite_blkid = ubsync_blkid + ubsync_blkid / old_children - old_children; VERIFY3U(next_overwrite_blkid, >, ubsync_blkid); - if (blkid >= next_overwrite_blkid) { raidz_reflow_record_progress(vre, next_overwrite_blkid << ashift, tx); return (B_TRUE); } - range_tree_remove(rt, offset, length); + size = MIN(size, raidz_expand_max_copy_bytes); + size = MIN(size, (uint64_t)old_children * + MIN(zfs_max_recordsize, SPA_MAXBLOCKSIZE)); + size = MAX(size, 1 << ashift); + uint_t blocks = MIN(size >> ashift, next_overwrite_blkid - blkid); + size = (uint64_t)blocks << ashift; + + range_tree_remove(rt, offset, size); - raidz_reflow_arg_t *rra = kmem_zalloc(sizeof (*rra), KM_SLEEP); + uint_t reads = MIN(blocks, old_children); + uint_t writes = MIN(blocks, vd->vdev_children); + raidz_reflow_arg_t *rra = kmem_zalloc(sizeof (*rra) + + sizeof (zio_t *) * writes, KM_SLEEP); rra->rra_vre = vre; rra->rra_lr = zfs_rangelock_enter(&vre->vre_rangelock, - offset, length, RL_WRITER); + offset, size, RL_WRITER); rra->rra_txg = dmu_tx_get_txg(tx); + rra->rra_ashift = ashift; + rra->rra_tbd = reads; + rra->rra_writes = writes; - raidz_reflow_record_progress(vre, offset + length, tx); - - mutex_enter(&vre->vre_lock); - vre->vre_outstanding_bytes += length; - mutex_exit(&vre->vre_lock); + raidz_reflow_record_progress(vre, offset + size, tx); /* * SCL_STATE will be released when the read and write are done, @@ -4006,29 +4037,61 @@ raidz_reflow_impl(vdev_t *vd, vdev_raidz_expand_t *vre, range_tree_t *rt, mutex_exit(&vre->vre_lock); /* drop everything we acquired */ - zfs_rangelock_exit(rra->rra_lr); - kmem_free(rra, sizeof (*rra)); spa_config_exit(spa, SCL_STATE, spa); + zfs_rangelock_exit(rra->rra_lr); + kmem_free(rra, sizeof (*rra) + sizeof (zio_t *) * writes); return (B_TRUE); } + mutex_enter(&vre->vre_lock); + vre->vre_outstanding_bytes += size; + mutex_exit(&vre->vre_lock); + + /* Allocate ABD and ZIO for each child we write. */ + int txgoff = dmu_tx_get_txg(tx) & TXG_MASK; zio_t *pio = spa->spa_txg_zio[txgoff]; - abd_t *abd = abd_alloc_for_io(length, B_FALSE); - zio_t *write_zio = zio_vdev_child_io(pio, NULL, - vd->vdev_child[blkid % vd->vdev_children], - (blkid / vd->vdev_children) << ashift, - abd, length, - ZIO_TYPE_WRITE, ZIO_PRIORITY_REMOVAL, - ZIO_FLAG_CANFAIL, - raidz_reflow_write_done, rra); - - zio_nowait(zio_vdev_child_io(write_zio, NULL, - vd->vdev_child[blkid % old_children], - (blkid / old_children) << ashift, - abd, length, - ZIO_TYPE_READ, ZIO_PRIORITY_REMOVAL, - ZIO_FLAG_CANFAIL, - raidz_reflow_read_done, rra)); + uint_t b = blocks / vd->vdev_children; + uint_t bb = blocks % vd->vdev_children; + for (uint_t i = 0; i < writes; i++) { + uint_t n = b + (i < bb); + abd_t *abd = abd_alloc_for_io(n << ashift, B_FALSE); + rra->rra_zio[i] = zio_vdev_child_io(pio, NULL, + vd->vdev_child[(blkid + i) % vd->vdev_children], + ((blkid + i) / vd->vdev_children) << ashift, + abd, n << ashift, ZIO_TYPE_WRITE, ZIO_PRIORITY_REMOVAL, + ZIO_FLAG_CANFAIL, raidz_reflow_write_done, rra); + } + + /* + * Allocate and issue ZIO for each child we read. For reads of only + * one block we can use respective writer ABDs, since they will also + * have only one block. For bigger reads create gang ABDs and fill + * them with respective blocks from writer ABDs. + */ + b = blocks / old_children; + bb = blocks % old_children; + for (uint_t i = 0; i < reads; i++) { + uint_t n = b + (i < bb); + abd_t *abd; + if (n > 1) { + abd = abd_alloc_gang(); + for (uint_t j = 0; j < n; j++) { + uint_t b = j * old_children + i; + abd_t *cabd = abd_get_offset_size( + rra->rra_zio[b % vd->vdev_children]->io_abd, + (b / vd->vdev_children) << ashift, + 1 << ashift); + abd_gang_add(abd, cabd, B_TRUE); + } + } else { + abd = rra->rra_zio[i]->io_abd; + } + zio_nowait(zio_vdev_child_io(pio, NULL, + vd->vdev_child[(blkid + i) % old_children], + ((blkid + i) / old_children) << ashift, abd, + n << ashift, ZIO_TYPE_READ, ZIO_PRIORITY_REMOVAL, + ZIO_FLAG_CANFAIL, raidz_reflow_read_done, rra)); + } return (B_FALSE); } @@ -4122,7 +4185,7 @@ raidz_reflow_scratch_sync(void *arg, dmu_tx_t *tx) zio_nowait(zio_vdev_child_io(pio, NULL, raidvd->vdev_child[i], VDEV_BOOT_OFFSET - VDEV_LABEL_START_SIZE, abds[i], - write_size, ZIO_TYPE_READ, ZIO_PRIORITY_ASYNC_READ, + write_size, ZIO_TYPE_READ, ZIO_PRIORITY_REMOVAL, ZIO_FLAG_CANFAIL, raidz_scratch_child_done, pio)); } error = zio_wait(pio); @@ -4142,7 +4205,7 @@ raidz_reflow_scratch_sync(void *arg, dmu_tx_t *tx) ASSERT0(vdev_is_dead(raidvd->vdev_child[i])); zio_nowait(zio_vdev_child_io(pio, NULL, raidvd->vdev_child[i], 0, abds[i], read_size, ZIO_TYPE_READ, - ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, + ZIO_PRIORITY_REMOVAL, ZIO_FLAG_CANFAIL, raidz_scratch_child_done, pio)); } error = zio_wait(pio); @@ -4197,7 +4260,7 @@ raidz_reflow_scratch_sync(void *arg, dmu_tx_t *tx) */ zio_nowait(zio_vdev_child_io(pio, NULL, raidvd->vdev_child[i], VDEV_BOOT_OFFSET - VDEV_LABEL_START_SIZE, abds[i], - write_size, ZIO_TYPE_WRITE, ZIO_PRIORITY_ASYNC_WRITE, + write_size, ZIO_TYPE_WRITE, ZIO_PRIORITY_REMOVAL, ZIO_FLAG_CANFAIL, raidz_scratch_child_done, pio)); } error = zio_wait(pio); @@ -4246,7 +4309,7 @@ raidz_reflow_scratch_sync(void *arg, dmu_tx_t *tx) for (int i = 0; i < raidvd->vdev_children; i++) { zio_nowait(zio_vdev_child_io(pio, NULL, raidvd->vdev_child[i], 0, abds[i], write_size, ZIO_TYPE_WRITE, - ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_CANFAIL, + ZIO_PRIORITY_REMOVAL, ZIO_FLAG_CANFAIL, raidz_scratch_child_done, pio)); } error = zio_wait(pio); @@ -4355,8 +4418,7 @@ vdev_raidz_reflow_copy_scratch(spa_t *spa) */ zio_nowait(zio_vdev_child_io(pio, NULL, raidvd->vdev_child[i], VDEV_BOOT_OFFSET - VDEV_LABEL_START_SIZE, abds[i], - write_size, ZIO_TYPE_READ, - ZIO_PRIORITY_ASYNC_READ, 0, + write_size, ZIO_TYPE_READ, ZIO_PRIORITY_REMOVAL, 0, raidz_scratch_child_done, pio)); } zio_wait(pio); @@ -4368,7 +4430,7 @@ vdev_raidz_reflow_copy_scratch(spa_t *spa) for (int i = 0; i < raidvd->vdev_children; i++) { zio_nowait(zio_vdev_child_io(pio, NULL, raidvd->vdev_child[i], 0, abds[i], write_size, ZIO_TYPE_WRITE, - ZIO_PRIORITY_ASYNC_WRITE, 0, + ZIO_PRIORITY_REMOVAL, 0, raidz_scratch_child_done, pio)); } zio_wait(pio); @@ -4490,8 +4552,11 @@ spa_raidz_expand_thread(void *arg, zthr_t *zthr) * space. Note that there may be a little bit more free * space (e.g. in ms_defer), and it's fine to copy that too. */ - range_tree_t *rt = range_tree_create(NULL, RANGE_SEG64, - NULL, 0, 0); + uint64_t shift, start; + range_seg_type_t type = metaslab_calculate_range_tree_type( + raidvd, msp, &start, &shift); + range_tree_t *rt = range_tree_create(NULL, type, NULL, + start, shift); range_tree_add(rt, msp->ms_start, msp->ms_size); range_tree_walk(msp->ms_allocatable, range_tree_remove, rt); mutex_exit(&msp->ms_lock); @@ -4516,7 +4581,10 @@ spa_raidz_expand_thread(void *arg, zthr_t *zthr) * when importing a pool with a expansion in progress), * discard any state that we have already processed. */ - range_tree_clear(rt, 0, vre->vre_offset); + if (vre->vre_offset > msp->ms_start) { + range_tree_clear(rt, msp->ms_start, + vre->vre_offset - msp->ms_start); + } while (!zthr_iscancelled(zthr) && !range_tree_is_empty(rt) && @@ -5043,7 +5111,6 @@ vdev_ops_t vdev_raidz_ops = { .vdev_op_leaf = B_FALSE /* not a leaf vdev */ }; -/* BEGIN CSTYLED */ ZFS_MODULE_PARAM(zfs_vdev, raidz_, expand_max_reflow_bytes, ULONG, ZMOD_RW, "For testing, pause RAIDZ expansion after reflowing this many bytes"); ZFS_MODULE_PARAM(zfs_vdev, raidz_, expand_max_copy_bytes, ULONG, ZMOD_RW, @@ -5053,4 +5120,3 @@ ZFS_MODULE_PARAM(zfs_vdev, raidz_, io_aggregate_rows, ULONG, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, scrub_after_expand, INT, ZMOD_RW, "For expanded RAIDZ, automatically start a pool scrub when expansion " "completes"); -/* END CSTYLED */ diff --git a/module/zfs/vdev_rebuild.c b/module/zfs/vdev_rebuild.c index 8a8b02cab5c6..f80ed1b401f9 100644 --- a/module/zfs/vdev_rebuild.c +++ b/module/zfs/vdev_rebuild.c @@ -345,10 +345,14 @@ vdev_rebuild_complete_sync(void *arg, dmu_tx_t *tx) * While we're in syncing context take the opportunity to * setup the scrub when there are no more active rebuilds. */ - pool_scan_func_t func = POOL_SCAN_SCRUB; - if (dsl_scan_setup_check(&func, tx) == 0 && + setup_sync_arg_t setup_sync_arg = { + .func = POOL_SCAN_SCRUB, + .txgstart = 0, + .txgend = 0, + }; + if (dsl_scan_setup_check(&setup_sync_arg.func, tx) == 0 && zfs_rebuild_scrub_enabled) { - dsl_scan_setup_sync(&func, tx); + dsl_scan_setup_sync(&setup_sync_arg, tx); } cv_broadcast(&vd->vdev_rebuild_cv); diff --git a/module/zfs/vdev_removal.c b/module/zfs/vdev_removal.c index 1249657f9d72..08c85a874803 100644 --- a/module/zfs/vdev_removal.c +++ b/module/zfs/vdev_removal.c @@ -2551,11 +2551,9 @@ ZFS_MODULE_PARAM(zfs_vdev, zfs_, remove_max_segment, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs_vdev, vdev_, removal_max_span, UINT, ZMOD_RW, "Largest span of free chunks a remap segment can span"); -/* BEGIN CSTYLED */ ZFS_MODULE_PARAM(zfs_vdev, zfs_, removal_suspend_progress, UINT, ZMOD_RW, "Pause device removal after this many bytes are copied " "(debug use only - causes removal to hang)"); -/* END CSTYLED */ EXPORT_SYMBOL(free_from_removing_vdev); EXPORT_SYMBOL(spa_removal_get_stats); diff --git a/module/zfs/zap.c b/module/zfs/zap.c index 40e7bcf3ed1f..99fc4ec1928f 100644 --- a/module/zfs/zap.c +++ b/module/zfs/zap.c @@ -1706,10 +1706,8 @@ zap_shrink(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx) return (err); } -/* CSTYLED */ ZFS_MODULE_PARAM(zfs, , zap_iterate_prefetch, INT, ZMOD_RW, "When iterating ZAP object, prefetch it"); -/* CSTYLED */ ZFS_MODULE_PARAM(zfs, , zap_shrink_enabled, INT, ZMOD_RW, "Enable ZAP shrinking"); diff --git a/module/zfs/zap_micro.c b/module/zfs/zap_micro.c index dfe309aa551f..55b60006e58c 100644 --- a/module/zfs/zap_micro.c +++ b/module/zfs/zap_micro.c @@ -2030,7 +2030,6 @@ EXPORT_SYMBOL(zap_cursor_serialize); EXPORT_SYMBOL(zap_cursor_init_serialized); EXPORT_SYMBOL(zap_get_stats); -/* CSTYLED */ ZFS_MODULE_PARAM(zfs, , zap_micro_max_size, INT, ZMOD_RW, "Maximum micro ZAP size, before converting to a fat ZAP, in bytes"); #endif diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 8188a9e46865..b1b0ae54460b 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -1718,6 +1718,9 @@ zfs_ioc_pool_scrub(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE); } else if (scan_type == POOL_SCAN_NONE) { error = spa_scan_stop(spa); + } else if (scan_cmd == POOL_SCRUB_FROM_LAST_TXG) { + error = spa_scan_range(spa, scan_type, + spa_get_last_scrubbed_txg(spa), 0); } else { error = spa_scan(spa, scan_type); } diff --git a/scripts/cstyle.pl b/scripts/cstyle.pl index d47fd3362408..123020b08127 100755 --- a/scripts/cstyle.pl +++ b/scripts/cstyle.pl @@ -211,6 +211,7 @@ ($$) my $in_comment = 0; my $comment_done = 0; my $in_warlock_comment = 0; +my $in_macro_call = 0; my $in_function = 0; my $in_function_header = 0; my $function_header_full_indent = 0; @@ -395,12 +396,18 @@ ($$) } } + # If this looks like a top-level macro invocation, remember it so we + # don't mistake it for a function declaration below. + if (/^[A-Za-z_][A-Za-z_0-9]*\(/) { + $in_macro_call = 1; + } + # # If this matches something of form "foo(", it's probably a function # definition, unless it ends with ") bar;", in which case it's a declaration # that uses a macro to generate the type. # - if (/^\w+\(/ && !/\) \w+;/) { + if (!$in_macro_call && /^\w+\(/ && !/\) \w+;/) { $in_function_header = 1; if (/\($/) { $function_header_full_indent = 1; @@ -565,7 +572,9 @@ ($$) err("comma or semicolon followed by non-blank"); } # allow "for" statements to have empty "while" clauses - if (/\s[,;]/ && !/^[\t]+;$/ && !/^\s*for \([^;]*; ;[^;]*\)/) { + # allow macro invocations to have empty parameters + if (/\s[,;]/ && !/^[\t]+;$/ && + !($in_macro_call || /^\s*for \([^;]*; ;[^;]*\)/)) { err("comma or semicolon preceded by blank"); } if (/^\s*(&&|\|\|)/) { @@ -686,10 +695,13 @@ ($$) err("unary * followed by space"); } } - if ($check_posix_types) { + if ($check_posix_types && !$in_macro_call) { # try to detect old non-POSIX types. # POSIX requires all non-standard typedefs to end in _t, # but historically these have been used. + # + # We don't check inside macro invocations because macros have + # legitmate uses for these names in function generators. if (/\b(unchar|ushort|uint|ulong|u_int|u_short|u_long|u_char|quad)\b/) { err("non-POSIX typedef $1 used: use $old2posix{$1} instead"); } @@ -700,6 +712,14 @@ ($$) "else and right brace should be on same line"); } } + + # Macro invocations end with a closing paren, and possibly a semicolon. + # We do this check down here to make sure all the regular checks are + # applied to calls that appear entirely on a single line. + if ($in_macro_call && /\);?$/) { + $in_macro_call = 0; + } + $prev = $line; } diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index fc4adc42d00a..a69d36df2f98 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -82,7 +82,7 @@ tests = ['block_cloning_clone_mmap_cached', 'block_cloning_copyfilerange_fallback_same_txg', 'block_cloning_replay', 'block_cloning_replay_encrypted', 'block_cloning_lwb_buffer_overflow', 'block_cloning_clone_mmap_write', - 'block_cloning_rlimit_fsize'] + 'block_cloning_rlimit_fsize', 'block_cloning_large_offset'] tags = ['functional', 'block_cloning'] [tests/functional/bootfs] diff --git a/tests/test-runner/bin/zts-report.py.in b/tests/test-runner/bin/zts-report.py.in index 07ec2c4b601b..0bfc64959c38 100755 --- a/tests/test-runner/bin/zts-report.py.in +++ b/tests/test-runner/bin/zts-report.py.in @@ -339,6 +339,8 @@ elif sys.platform.startswith('linux'): ['SKIP', cfr_reason], 'block_cloning/block_cloning_rlimit_fsize': ['SKIP', cfr_reason], + 'block_cloning/block_cloning_large_offset': + ['SKIP', cfr_reason], 'cli_root/zfs_rename/zfs_rename_002_pos': ['FAIL', known_reason], 'cli_root/zpool_reopen/zpool_reopen_003_pos': ['FAIL', known_reason], 'cp_files/cp_files_002_pos': ['SKIP', cfr_reason], diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index 7d1551a63f0d..67630cb564ae 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -482,6 +482,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/block_cloning/block_cloning_replay_encrypted.ksh \ functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh \ functional/block_cloning/block_cloning_rlimit_fsize.ksh \ + functional/block_cloning/block_cloning_large_offset.ksh \ functional/bootfs/bootfs_001_pos.ksh \ functional/bootfs/bootfs_002_neg.ksh \ functional/bootfs/bootfs_003_pos.ksh \ @@ -1225,6 +1226,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zpool_scrub/zpool_scrub_multiple_copies.ksh \ functional/cli_root/zpool_scrub/zpool_scrub_offline_device.ksh \ functional/cli_root/zpool_scrub/zpool_scrub_print_repairing.ksh \ + functional/cli_root/zpool_scrub/zpool_scrub_txg_continue_from_last.ksh \ functional/cli_root/zpool_scrub/zpool_error_scrub_001_pos.ksh \ functional/cli_root/zpool_scrub/zpool_error_scrub_002_pos.ksh \ functional/cli_root/zpool_scrub/zpool_error_scrub_003_pos.ksh \ diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_large_offset.ksh b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_large_offset.ksh new file mode 100755 index 000000000000..1d5a2619ebf3 --- /dev/null +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_large_offset.ksh @@ -0,0 +1,83 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib + +# +# DESCRIPTION: +# Verify that cloning a file at a large offset is possible. +# +# STRATEGY: +# 1. Create dataset. +# 2. Populate the source file with 1024 blocks at 1024 block offset. +# 3. Clone 1024 blocks at a 1024-block offset. +# 4. Compare the cloned file with the original file. +# + +verify_runnable "global" + +if is_linux && [[ $(linux_version) -lt $(linux_version "4.5") ]]; then + log_unsupported "copy_file_range not available before Linux 4.5" +fi + +claim="The first clone at a large offset is functional" + +log_assert $claim + +function cleanup +{ + datasetexists $TESTPOOL && destroy_pool $TESTPOOL +} + +log_onexit cleanup + +# +# 1. Create dataset. +# +log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $DISKS +sync_pool $TESTPOOL + +# +# 2. Populate the source file with 1024 blocks at 1024 block offset. +# +log_must dd if=/dev/urandom of=/$TESTPOOL/file1 \ + oflag=sync bs=128k count=1024 seek=1024 +sync_pool $TESTPOOL + +# +# 3. Clone 1024 blocks at a 1024-block offset. +# +log_must clonefile -f /$TESTPOOL/file1 /$TESTPOOL/file2 134217728 134217728 \ + 134217728 +sync_pool $TESTPOOL + +# +# 4. Compare the cloned file with the original file. +# +log_must have_same_content /$TESTPOOL/file1 /$TESTPOOL/file2 +typeset blocks=$(get_same_blocks $TESTPOOL file1 $TESTPOOL file2) + +# FreeBSD's seq(1) leaves a trailing space, remove it with sed(1). +log_must [ "$blocks" = "$(seq -s " " 0 1023 | sed 's/ $//')" ] + +log_pass $claim diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg index e1fe865b1d3b..e5a8b9026e03 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg @@ -63,6 +63,7 @@ typeset -a properties=( "bcloneused" "bclonesaved" "bcloneratio" + "last_scrubbed_txg" "feature@async_destroy" "feature@empty_bpobj" "feature@lz4_compress" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_txg_continue_from_last.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_txg_continue_from_last.ksh new file mode 100755 index 000000000000..b28a8d2cf72f --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_txg_continue_from_last.ksh @@ -0,0 +1,104 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# Copyright (c) 2023, Klara Inc. +# +# This software was developed by +# Mariusz Zaborski +# under sponsorship from Wasabi Technology, Inc. and Klara Inc. + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_scrub/zpool_scrub.cfg +. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib + +# +# DESCRIPTION: +# Verify scrub -C +# +# STRATEGY: +# 1. Create a pool and create one file. +# 2. Verify that the last_txg_scrub is 0. +# 3. Run scrub. +# 4. Verify that the last_txg_scrub is set. +# 5. Create second file. +# 6. Invalidate both files. +# 7. Run scrub only from last point. +# 8. Verify that only one file, that was created with newer txg, +# was detected. +# + +verify_runnable "global" + +function cleanup +{ + log_must zinject -c all + log_must rm -f $mntpnt/f1 + log_must rm -f $mntpnt/f2 +} + +log_onexit cleanup + +log_assert "Verify scrub -C." + +# Create one file. +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) + +log_must file_write -b 1048576 -c 10 -o create -d 0 -f $mntpnt/f1 +log_must sync_pool $TESTPOOL true +f1txg=$(get_last_txg_synced $TESTPOOL) + +# Verify that last_scrubbed_txg isn't set. +zpoollasttxg=$(zpool get -H -o value last_scrubbed_txg $TESTPOOL) +log_must [ $zpoollasttxg -eq 0 ] + +# Run scrub. +log_must zpool scrub -w $TESTPOOL + +# Verify that last_scrubbed_txg is set. +zpoollasttxg=$(zpool get -H -o value last_scrubbed_txg $TESTPOOL) +log_must [ $zpoollasttxg -ne 0 ] + +# Create second file. +log_must file_write -b 1048576 -c 10 -o create -d 0 -f $mntpnt/f2 +log_must sync_pool $TESTPOOL true +f2txg=$(get_last_txg_synced $TESTPOOL) + +# Make sure that the sync txg are different. +log_must [ $f1txg -ne $f2txg ] + +# Insert faults. +log_must zinject -a -t data -e io -T read $mntpnt/f1 +log_must zinject -a -t data -e io -T read $mntpnt/f2 + +# Run scrub from last saved point. +log_must zpool scrub -w -C $TESTPOOL + +# Verify that only newer file was detected. +log_mustnot eval "zpool status -v $TESTPOOL | grep '$mntpnt/f1'" +log_must eval "zpool status -v $TESTPOOL | grep '$mntpnt/f2'" + +# Verify that both files are corrupted. +log_must zpool scrub -w $TESTPOOL +log_must eval "zpool status -v $TESTPOOL | grep '$mntpnt/f1'" +log_must eval "zpool status -v $TESTPOOL | grep '$mntpnt/f2'" + +log_pass "Verified scrub -C show expected status." diff --git a/tests/zfs-tests/tests/functional/dedup/dedup_fdt_create.ksh b/tests/zfs-tests/tests/functional/dedup/dedup_fdt_create.ksh index 4f6e5805bb3a..3b17de5a4073 100755 --- a/tests/zfs-tests/tests/functional/dedup/dedup_fdt_create.ksh +++ b/tests/zfs-tests/tests/functional/dedup/dedup_fdt_create.ksh @@ -70,7 +70,7 @@ log_must zpool sync log_must test $(get_pool_prop feature@fast_dedup $TESTPOOL) = "active" # four entries in the unique table -log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-unique: 4 entries'" +log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-unique:.*entries=4'" # single containing object in the MOS log_must test $(zdb -dddd $TESTPOOL 1 | grep DDT-sha256 | wc -l) -eq 1 @@ -84,7 +84,7 @@ log_must cp /$TESTPOOL/file1 /$TESTPOOL/file2 log_must zpool sync # now four entries in the duplicate table -log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-duplicate: 4 entries'" +log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-duplicate:.*entries=4'" # now two DDT ZAPs in the container object; DDT ZAPs aren't cleaned up until # the entire logical table is destroyed diff --git a/tests/zfs-tests/tests/functional/dedup/dedup_fdt_import.ksh b/tests/zfs-tests/tests/functional/dedup/dedup_fdt_import.ksh index 259eaddc0843..faa9b7e044cd 100755 --- a/tests/zfs-tests/tests/functional/dedup/dedup_fdt_import.ksh +++ b/tests/zfs-tests/tests/functional/dedup/dedup_fdt_import.ksh @@ -70,7 +70,7 @@ log_must zpool sync log_must test $(get_pool_prop feature@fast_dedup $TESTPOOL) = "active" # four entries in the unique table -log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-unique: 4 entries'" +log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-unique:.*entries=4'" # single containing object in the MOS log_must test $(zdb -dddd $TESTPOOL 1 | grep DDT-sha256 | wc -l) -eq 1 @@ -107,7 +107,7 @@ log_must zpool sync log_must test $(get_pool_prop feature@fast_dedup $TESTPOOL) = "active" # four entries in the unique table -log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-unique: 4 entries'" +log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-unique:.*entries=4'" # single containing object in the MOS log_must test $(zdb -dddd $TESTPOOL 1 | grep DDT-sha256 | wc -l) -eq 1 diff --git a/tests/zfs-tests/tests/functional/dedup/dedup_legacy_create.ksh b/tests/zfs-tests/tests/functional/dedup/dedup_legacy_create.ksh index e3efcf5c8b36..9e524ddbe28e 100755 --- a/tests/zfs-tests/tests/functional/dedup/dedup_legacy_create.ksh +++ b/tests/zfs-tests/tests/functional/dedup/dedup_legacy_create.ksh @@ -63,7 +63,7 @@ log_must zpool sync log_must test $(get_pool_prop feature@fast_dedup $TESTPOOL) = "disabled" # should be four entries in the unique table -log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-unique: 4 entries'" +log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-unique:.*entries=4'" # should be just one DDT ZAP in the MOS log_must test $(zdb -dddd $TESTPOOL 1 | grep DDT-sha256-zap- | wc -l) -eq 1 @@ -73,7 +73,7 @@ log_must cp /$TESTPOOL/file1 /$TESTPOOL/file2 log_must zpool sync # now four entries in the duplicate table -log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-duplicate: 4 entries'" +log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-duplicate:.*entries=4'" # now two DDT ZAPs in the MOS; DDT ZAPs aren't cleaned up until the entire # logical table is destroyed diff --git a/tests/zfs-tests/tests/functional/dedup/dedup_legacy_fdt_mixed.ksh b/tests/zfs-tests/tests/functional/dedup/dedup_legacy_fdt_mixed.ksh index 114cf0266e12..fd3b01e8cd2c 100755 --- a/tests/zfs-tests/tests/functional/dedup/dedup_legacy_fdt_mixed.ksh +++ b/tests/zfs-tests/tests/functional/dedup/dedup_legacy_fdt_mixed.ksh @@ -71,7 +71,7 @@ log_must dd if=/dev/urandom of=/$TESTPOOL/ds1/file1 bs=128k count=4 log_must zpool sync # should be four entries in the skein unique table -log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-skein-zap-unique: 4 entries'" +log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-skein-zap-unique:.*entries=4'" # should be just one DDT ZAP in the MOS log_must test $(zdb -dddd $TESTPOOL 1 | grep DDT-.*-zap- | wc -l) -eq 1 @@ -90,7 +90,7 @@ log_must zpool sync log_must test $(get_pool_prop feature@fast_dedup $TESTPOOL) = "active" # now also four entries in the blake3 unique table -log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-blake3-zap-unique: 4 entries'" +log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-blake3-zap-unique:.*entries=4'" # two entries in the MOS: the legacy skein DDT ZAP, and the containing dir for # the blake3 FDT table diff --git a/tests/zfs-tests/tests/functional/dedup/dedup_legacy_fdt_upgrade.ksh b/tests/zfs-tests/tests/functional/dedup/dedup_legacy_fdt_upgrade.ksh index c36463134fde..7a1e8006db16 100755 --- a/tests/zfs-tests/tests/functional/dedup/dedup_legacy_fdt_upgrade.ksh +++ b/tests/zfs-tests/tests/functional/dedup/dedup_legacy_fdt_upgrade.ksh @@ -71,7 +71,7 @@ log_must zpool sync log_must test $(get_pool_prop feature@fast_dedup $TESTPOOL) = "disabled" # should be four entries in the unique table -log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-unique: 4 entries'" +log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-unique:.*entries=4'" # should be just one DDT ZAP in the MOS log_must test $(zdb -dddd $TESTPOOL 1 | grep DDT-sha256-zap- | wc -l) -eq 1 @@ -90,7 +90,7 @@ log_must zpool sync log_must test $(get_pool_prop feature@fast_dedup $TESTPOOL) = "enabled" # now four entries in the duplicate table -log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-duplicate: 4 entries'" +log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-duplicate:.*entries=4'" # now two DDT ZAPs in the MOS; DDT ZAPs aren't cleaned up until the entire # logical table is destroyed @@ -117,7 +117,7 @@ log_must zpool sync log_must test $(get_pool_prop feature@fast_dedup $TESTPOOL) = "active" # four entries in the unique table -log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-unique: 4 entries'" +log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-unique:.*entries=4'" # single containing object in the MOS log_must test $(zdb -dddd $TESTPOOL 1 | grep DDT-sha256 | wc -l) -eq 1 diff --git a/tests/zfs-tests/tests/functional/dedup/dedup_legacy_import.ksh b/tests/zfs-tests/tests/functional/dedup/dedup_legacy_import.ksh index a7b667eaf882..4de46e89fc05 100755 --- a/tests/zfs-tests/tests/functional/dedup/dedup_legacy_import.ksh +++ b/tests/zfs-tests/tests/functional/dedup/dedup_legacy_import.ksh @@ -63,7 +63,7 @@ log_must zpool sync log_must test $(get_pool_prop feature@fast_dedup $TESTPOOL) = "disabled" # should be four entries in the unique table -log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-unique: 4 entries'" +log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-unique:.*entries=4'" # should be just one DDT ZAP in the MOS log_must test $(zdb -dddd $TESTPOOL 1 | grep DDT-sha256-zap- | wc -l) -eq 1 @@ -96,7 +96,7 @@ log_must zpool sync log_must test $(get_pool_prop feature@fast_dedup $TESTPOOL) = "disabled" # should be four entries in the unique table -log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-unique: 4 entries'" +log_must eval "zdb -D $TESTPOOL | grep -q 'DDT-sha256-zap-unique:.*entries=4'" # should be just one DDT ZAP in the MOS log_must test $(zdb -dddd $TESTPOOL 1 | grep DDT-sha256-zap- | wc -l) -eq 1