From e5635df8524b19c971d018164c49960dd34f4cd6 Mon Sep 17 00:00:00 2001 From: Mariusz Zaborski Date: Wed, 30 Aug 2023 14:22:09 +0000 Subject: [PATCH] Add last_scrubbed_txg property and option to scrub from last saved txg The `last_scrubbed_txg` property indicates the transaction group (TXG) up to which the most recent scrub operation has checked and repaired the dataset. This provides administrators with insight into the data integrity status of their pool at a specific point in time. Sponsored-By: Wasabi Technology, Inc. Sponsored-By: Klara Inc. Signed-off-by: Mariusz Zaborski --- cmd/zpool/zpool_main.c | 26 ++++- include/sys/dmu.h | 1 + include/sys/fs/zfs.h | 2 + include/sys/spa.h | 1 + include/sys/spa_impl.h | 1 + lib/libzfs/libzfs.abi | 3 +- man/man7/zpoolprops.7 | 10 +- man/man8/zpool-scrub.8 | 7 +- module/zcommon/zpool_prop.c | 3 + module/zfs/dsl_scan.c | 18 ++- module/zfs/spa.c | 9 +- module/zfs/spa_misc.c | 6 + module/zfs/zfs_ioctl.c | 3 + tests/zfs-tests/tests/Makefile.am | 1 + .../cli_root/zpool_get/zpool_get.cfg | 1 + .../zpool_scrub_txg_continue_from_last.ksh | 104 ++++++++++++++++++ 16 files changed, 185 insertions(+), 11 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_txg_continue_from_last.ksh diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c index 4458b902de31..6aef7bdb3ebe 100644 --- a/cmd/zpool/zpool_main.c +++ b/cmd/zpool/zpool_main.c @@ -512,7 +512,8 @@ get_usage(zpool_help_t idx) return (gettext("\tinitialize [-c | -s | -u] [-w] " "[ ...]\n")); case HELP_SCRUB: - return (gettext("\tscrub [-s | -p] [-w] [-e] ...\n")); + return (gettext("\tscrub [-e | -s | -p | -C] [-w] " + " ...\n")); case HELP_RESILVER: return (gettext("\tresilver ...\n")); case HELP_TRIM: @@ -8429,12 +8430,13 @@ wait_callback(zpool_handle_t *zhp, void *data) } /* - * zpool scrub [-s | -p] [-w] [-e] ... + * zpool scrub [-e | -s | -p | -C] [-w] ... * * -e Only scrub blocks in the error log. * -s Stop. Stops any in-progress scrub. * -p Pause. Pause in-progress scrub. * -w Wait. Blocks until scrub has completed. + * -C Scrub from last saved txg. */ int zpool_do_scrub(int argc, char **argv) @@ -8450,9 +8452,10 @@ zpool_do_scrub(int argc, char **argv) boolean_t is_error_scrub = B_FALSE; boolean_t is_pause = B_FALSE; boolean_t is_stop = B_FALSE; + boolean_t is_txg_continue = B_FALSE; /* check options */ - while ((c = getopt(argc, argv, "spwe")) != -1) { + while ((c = getopt(argc, argv, "spweC")) != -1) { switch (c) { case 'e': is_error_scrub = B_TRUE; @@ -8466,6 +8469,9 @@ zpool_do_scrub(int argc, char **argv) case 'w': wait = B_TRUE; break; + case 'C': + is_txg_continue = B_TRUE; + break; case '?': (void) fprintf(stderr, gettext("invalid option '%c'\n"), optopt); @@ -8477,6 +8483,18 @@ zpool_do_scrub(int argc, char **argv) (void) fprintf(stderr, gettext("invalid option " "combination :-s and -p are mutually exclusive\n")); usage(B_FALSE); + } else if (is_pause && is_txg_continue) { + (void) fprintf(stderr, gettext("invalid option " + "combination :-p and -C are mutually exclusive\n")); + usage(B_FALSE); + } else if (is_stop && is_txg_continue) { + (void) fprintf(stderr, gettext("invalid option " + "combination :-s and -C are mutually exclusive\n")); + usage(B_FALSE); + } else if (is_error_scrub && is_txg_continue) { + (void) fprintf(stderr, gettext("invalid option " + "combination :-e and -C are mutually exclusive\n")); + usage(B_FALSE); } else { if (is_error_scrub) cb.cb_type = POOL_SCAN_ERRORSCRUB; @@ -8485,6 +8503,8 @@ zpool_do_scrub(int argc, char **argv) cb.cb_scrub_cmd = POOL_SCRUB_PAUSE; } else if (is_stop) { cb.cb_type = POOL_SCAN_NONE; + } else if (is_txg_continue) { + cb.cb_scrub_cmd = POOL_SCRUB_FROM_LAST_TXG; } else { cb.cb_scrub_cmd = POOL_SCRUB_NORMAL; } diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 22cbd7fc73b6..29f715039d29 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -381,6 +381,7 @@ typedef struct dmu_buf { #define DMU_POOL_CREATION_VERSION "creation_version" #define DMU_POOL_SCAN "scan" #define DMU_POOL_ERRORSCRUB "error_scrub" +#define DMU_POOL_LAST_SCRUBBED_TXG "last_scrubbed_txg" #define DMU_POOL_FREE_BPOBJ "free_bpobj" #define DMU_POOL_BPTREE_OBJ "bptree_obj" #define DMU_POOL_EMPTY_BPOBJ "empty_bpobj" diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index 1676020d04d3..dc474e3739f3 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -265,6 +265,7 @@ typedef enum { ZPOOL_PROP_DEDUP_TABLE_SIZE, ZPOOL_PROP_DEDUP_TABLE_QUOTA, ZPOOL_PROP_DEDUPCACHED, + ZPOOL_PROP_LAST_SCRUBBED_TXG, ZPOOL_NUM_PROPS } zpool_prop_t; @@ -1088,6 +1089,7 @@ typedef enum pool_scan_func { typedef enum pool_scrub_cmd { POOL_SCRUB_NORMAL = 0, POOL_SCRUB_PAUSE, + POOL_SCRUB_FROM_LAST_TXG, POOL_SCRUB_FLAGS_END } pool_scrub_cmd_t; diff --git a/include/sys/spa.h b/include/sys/spa.h index 01070c09e6e3..5161e4996b1c 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -1081,6 +1081,7 @@ extern uint64_t spa_get_deadman_failmode(spa_t *spa); extern void spa_set_deadman_failmode(spa_t *spa, const char *failmode); extern boolean_t spa_suspended(spa_t *spa); extern uint64_t spa_bootfs(spa_t *spa); +extern uint64_t spa_get_last_scrubbed_txg(spa_t *spa); extern uint64_t spa_delegation(spa_t *spa); extern objset_t *spa_meta_objset(spa_t *spa); extern space_map_t *spa_syncing_log_sm(spa_t *spa); diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h index 7811abbb9ce3..84e4bc29a658 100644 --- a/include/sys/spa_impl.h +++ b/include/sys/spa_impl.h @@ -318,6 +318,7 @@ struct spa { uint64_t spa_scan_pass_scrub_spent_paused; /* total paused */ uint64_t spa_scan_pass_exam; /* examined bytes per pass */ uint64_t spa_scan_pass_issued; /* issued bytes per pass */ + uint64_t spa_scrubbed_last_txg; /* last txg scrubbed */ /* error scrub pause time in milliseconds */ uint64_t spa_scan_pass_errorscrub_pause; diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index ac9ae233c72d..2d6fc29f2a24 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -5984,7 +5984,8 @@ - + + diff --git a/man/man7/zpoolprops.7 b/man/man7/zpoolprops.7 index f4fcc620e4d9..85249121b72f 100644 --- a/man/man7/zpoolprops.7 +++ b/man/man7/zpoolprops.7 @@ -28,7 +28,7 @@ .\" Copyright (c) 2021, Colm Buckley .\" Copyright (c) 2023, Klara Inc. .\" -.Dd July 29, 2024 +.Dd June 20, 2024 .Dt ZPOOLPROPS 7 .Os . @@ -135,6 +135,14 @@ A unique identifier for the pool. The current health of the pool. Health can be one of .Sy ONLINE , DEGRADED , FAULTED , OFFLINE, REMOVED , UNAVAIL . +.It Sy last_scrubbed_txg +Indicates the transaction group (TXG) up to which the most recent scrub +operation has checked and repaired the dataset. +This provides insight into the data integrity status of their pool at +a specific point in time. +The +.Xr zpool-scrub 8 +might be used to utilize this property. .It Sy leaked Space not released while .Sy freeing diff --git a/man/man8/zpool-scrub.8 b/man/man8/zpool-scrub.8 index 03f3ad4991f9..761a51e75aa4 100644 --- a/man/man8/zpool-scrub.8 +++ b/man/man8/zpool-scrub.8 @@ -36,9 +36,8 @@ .Sh SYNOPSIS .Nm zpool .Cm scrub -.Op Fl s Ns | Ns Fl p +.Op Ns Fl e | Ns Fl p | Fl s Ns | Fl C Ns .Op Fl w -.Op Fl e .Ar pool Ns … . .Sh DESCRIPTION @@ -114,6 +113,10 @@ The pool must have been scrubbed at least once with the feature enabled to use this option. Error scrubbing cannot be run simultaneously with regular scrubbing or resilvering, nor can it be run when a regular scrub is paused. +.It Fl C +Continue scrub from last saved txg (see zpool +.Sy last_scrubbed_txg +property). .El .Sh EXAMPLES .Ss Example 1 diff --git a/module/zcommon/zpool_prop.c b/module/zcommon/zpool_prop.c index d3355730ba3d..a709679b9032 100644 --- a/module/zcommon/zpool_prop.c +++ b/module/zcommon/zpool_prop.c @@ -128,6 +128,9 @@ zpool_prop_init(void) zprop_register_number(ZPOOL_PROP_DEDUP_TABLE_SIZE, "dedup_table_size", 0, PROP_READONLY, ZFS_TYPE_POOL, "", "DDTSIZE", B_FALSE, sfeatures); + zprop_register_number(ZPOOL_PROP_LAST_SCRUBBED_TXG, + "last_scrubbed_txg", 0, PROP_READONLY, ZFS_TYPE_POOL, "", + "LAST_SCRUBBED_TXG", B_FALSE, sfeatures); /* default number properties */ zprop_register_number(ZPOOL_PROP_VERSION, "version", SPA_VERSION, diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 2c4a1a535515..60768e72799d 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -231,6 +231,9 @@ static uint_t zfs_resilver_defer_percent = 10; ((scn)->scn_phys.scn_func == POOL_SCAN_SCRUB || \ (scn)->scn_phys.scn_func == POOL_SCAN_RESILVER) +#define DSL_SCAN_IS_SCRUB(scn) \ + ((scn)->scn_phys.scn_func == POOL_SCAN_SCRUB) + /* * Enable/disable the processing of the free_bpobj object. */ @@ -1137,15 +1140,24 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) spa_notify_waiters(spa); - if (dsl_scan_restarting(scn, tx)) + if (dsl_scan_restarting(scn, tx)) { spa_history_log_internal(spa, "scan aborted, restarting", tx, "errors=%llu", (u_longlong_t)spa_approx_errlog_size(spa)); - else if (!complete) + } else if (!complete) { spa_history_log_internal(spa, "scan cancelled", tx, "errors=%llu", (u_longlong_t)spa_approx_errlog_size(spa)); - else + } else { spa_history_log_internal(spa, "scan done", tx, "errors=%llu", (u_longlong_t)spa_approx_errlog_size(spa)); + if (DSL_SCAN_IS_SCRUB(scn)) { + VERIFY0(zap_update(dp->dp_meta_objset, + DMU_POOL_DIRECTORY_OBJECT, + DMU_POOL_LAST_SCRUBBED_TXG, + sizeof (uint64_t), 1, + &scn->scn_phys.scn_max_txg, tx)); + spa->spa_scrubbed_last_txg = scn->scn_phys.scn_max_txg; + } + } if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) { spa->spa_scrub_active = B_FALSE; diff --git a/module/zfs/spa.c b/module/zfs/spa.c index ff32e678a03e..707c144c8639 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -451,9 +451,10 @@ spa_prop_get_config(spa_t *spa, nvlist_t *nv) spa_prop_add_list(nv, ZPOOL_PROP_DEDUP_TABLE_SIZE, NULL, ddt_get_ddt_dsize(spa), src); - spa_prop_add_list(nv, ZPOOL_PROP_HEALTH, NULL, rvd->vdev_state, src); + spa_prop_add_list(nv, ZPOOL_PROP_LAST_SCRUBBED_TXG, NULL, + spa_get_last_scrubbed_txg(spa), src); version = spa_version(spa); if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) { @@ -4726,6 +4727,12 @@ spa_ld_get_props(spa_t *spa) if (error != 0 && error != ENOENT) return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + /* Load the last scrubbed txg. */ + error = spa_dir_prop(spa, DMU_POOL_LAST_SCRUBBED_TXG, + &spa->spa_scrubbed_last_txg, B_FALSE); + if (error != 0 && error != ENOENT) + return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO)); + /* * Load the livelist deletion field. If a livelist is queued for * deletion, indicate that in the spa diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index f486513fcaf9..5b1f44a5c1c2 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -2681,6 +2681,12 @@ spa_mode(spa_t *spa) return (spa->spa_mode); } +uint64_t +spa_get_last_scrubbed_txg(spa_t *spa) +{ + return (spa->spa_scrubbed_last_txg); +} + uint64_t spa_bootfs(spa_t *spa) { diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 8188a9e46865..b1b0ae54460b 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -1718,6 +1718,9 @@ zfs_ioc_pool_scrub(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl) error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE); } else if (scan_type == POOL_SCAN_NONE) { error = spa_scan_stop(spa); + } else if (scan_cmd == POOL_SCRUB_FROM_LAST_TXG) { + error = spa_scan_range(spa, scan_type, + spa_get_last_scrubbed_txg(spa), 0); } else { error = spa_scan(spa, scan_type); } diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index 7d1551a63f0d..03d797cc4f83 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -1225,6 +1225,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zpool_scrub/zpool_scrub_multiple_copies.ksh \ functional/cli_root/zpool_scrub/zpool_scrub_offline_device.ksh \ functional/cli_root/zpool_scrub/zpool_scrub_print_repairing.ksh \ + functional/cli_root/zpool_scrub/zpool_scrub_txg_continue_from_last.ksh \ functional/cli_root/zpool_scrub/zpool_error_scrub_001_pos.ksh \ functional/cli_root/zpool_scrub/zpool_error_scrub_002_pos.ksh \ functional/cli_root/zpool_scrub/zpool_error_scrub_003_pos.ksh \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg index e1fe865b1d3b..e5a8b9026e03 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg @@ -63,6 +63,7 @@ typeset -a properties=( "bcloneused" "bclonesaved" "bcloneratio" + "last_scrubbed_txg" "feature@async_destroy" "feature@empty_bpobj" "feature@lz4_compress" diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_txg_continue_from_last.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_txg_continue_from_last.ksh new file mode 100755 index 000000000000..b28a8d2cf72f --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_txg_continue_from_last.ksh @@ -0,0 +1,104 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# Copyright (c) 2023, Klara Inc. +# +# This software was developed by +# Mariusz Zaborski +# under sponsorship from Wasabi Technology, Inc. and Klara Inc. + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_scrub/zpool_scrub.cfg +. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib + +# +# DESCRIPTION: +# Verify scrub -C +# +# STRATEGY: +# 1. Create a pool and create one file. +# 2. Verify that the last_txg_scrub is 0. +# 3. Run scrub. +# 4. Verify that the last_txg_scrub is set. +# 5. Create second file. +# 6. Invalidate both files. +# 7. Run scrub only from last point. +# 8. Verify that only one file, that was created with newer txg, +# was detected. +# + +verify_runnable "global" + +function cleanup +{ + log_must zinject -c all + log_must rm -f $mntpnt/f1 + log_must rm -f $mntpnt/f2 +} + +log_onexit cleanup + +log_assert "Verify scrub -C." + +# Create one file. +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS) + +log_must file_write -b 1048576 -c 10 -o create -d 0 -f $mntpnt/f1 +log_must sync_pool $TESTPOOL true +f1txg=$(get_last_txg_synced $TESTPOOL) + +# Verify that last_scrubbed_txg isn't set. +zpoollasttxg=$(zpool get -H -o value last_scrubbed_txg $TESTPOOL) +log_must [ $zpoollasttxg -eq 0 ] + +# Run scrub. +log_must zpool scrub -w $TESTPOOL + +# Verify that last_scrubbed_txg is set. +zpoollasttxg=$(zpool get -H -o value last_scrubbed_txg $TESTPOOL) +log_must [ $zpoollasttxg -ne 0 ] + +# Create second file. +log_must file_write -b 1048576 -c 10 -o create -d 0 -f $mntpnt/f2 +log_must sync_pool $TESTPOOL true +f2txg=$(get_last_txg_synced $TESTPOOL) + +# Make sure that the sync txg are different. +log_must [ $f1txg -ne $f2txg ] + +# Insert faults. +log_must zinject -a -t data -e io -T read $mntpnt/f1 +log_must zinject -a -t data -e io -T read $mntpnt/f2 + +# Run scrub from last saved point. +log_must zpool scrub -w -C $TESTPOOL + +# Verify that only newer file was detected. +log_mustnot eval "zpool status -v $TESTPOOL | grep '$mntpnt/f1'" +log_must eval "zpool status -v $TESTPOOL | grep '$mntpnt/f2'" + +# Verify that both files are corrupted. +log_must zpool scrub -w $TESTPOOL +log_must eval "zpool status -v $TESTPOOL | grep '$mntpnt/f1'" +log_must eval "zpool status -v $TESTPOOL | grep '$mntpnt/f2'" + +log_pass "Verified scrub -C show expected status."