Skip to content

Commit

Permalink
Longer test case working, dedup working
Browse files Browse the repository at this point in the history
  • Loading branch information
tonyhutter committed Jan 31, 2024
1 parent 6ba5368 commit d2b0cdc
Show file tree
Hide file tree
Showing 32 changed files with 435 additions and 165 deletions.
11 changes: 7 additions & 4 deletions cmd/zpool/zpool_vdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -526,15 +526,18 @@ get_replication(nvlist_t *props, nvlist_t *nvroot, boolean_t fatal)
* replication level doesn't matter.
*/
(void) nvlist_lookup_string(nv, ZPOOL_CONFIG_ALLOCATION_BIAS, &str);

Check failure on line 528 in cmd/zpool/zpool_vdev.c

View workflow job for this annotation

GitHub Actions / checkstyle

line > 80 characters
if (str && strcmp(str, VDEV_ALLOC_BIAS_SPECIAL) == 0) {
(void) nvlist_lookup_string(props, "feature@special_writethrough", &str);
if (str &&
((strcmp(str, VDEV_ALLOC_BIAS_SPECIAL) == 0) ||
(strcmp(str, VDEV_ALLOC_BIAS_DEDUP) == 0)))
{

Check failure on line 532 in cmd/zpool/zpool_vdev.c

View workflow job for this annotation

GitHub Actions / checkstyle

left brace starting a line
(void) nvlist_lookup_string(props, "feature@backup_allocation_classes", &str);

Check failure on line 533 in cmd/zpool/zpool_vdev.c

View workflow job for this annotation

GitHub Actions / checkstyle

line > 80 characters
if (!str || (str && !(strcmp(str, "disabled") == 0))) {
/*
* If were here then the user didn't explicitly
* disable feature@special_writethrough. Thus,
* disable feature@backup_allocation_classes. Thus,

Check failure on line 537 in cmd/zpool/zpool_vdev.c

View workflow job for this annotation

GitHub Actions / checkstyle

line > 80 characters
* it is enabled. This is the default case.

Check failure on line 538 in cmd/zpool/zpool_vdev.c

View workflow job for this annotation

GitHub Actions / checkstyle

space or tab at end of line
*
* Since feature@special_writethrough is enabled
* Since feature@backup_allocation_classes is enabled

Check failure on line 540 in cmd/zpool/zpool_vdev.c

View workflow job for this annotation

GitHub Actions / checkstyle

line > 80 characters
* we can skip the redundancy check for the TLD
* since we know the special data is backed up to

Check failure on line 542 in cmd/zpool/zpool_vdev.c

View workflow job for this annotation

GitHub Actions / checkstyle

line > 80 characters
* the normal pool.
Expand Down
2 changes: 1 addition & 1 deletion include/sys/vdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ extern zio_t *vdev_probe(vdev_t *vd, zio_t *pio);
extern boolean_t vdev_is_concrete(vdev_t *vd);
extern boolean_t vdev_is_bootable(vdev_t *vd);
extern boolean_t vdev_is_alloc_class(vdev_t *vd);
extern boolean_t vdev_is_special_writethrough(spa_t *spa, vdev_t *vd);
extern boolean_t vdev_is_backup_alloc_class(spa_t *spa, vdev_t *vd);
extern vdev_t *vdev_lookup_top(spa_t *spa, uint64_t vdev);
extern vdev_t *vdev_lookup_by_guid(vdev_t *vd, uint64_t guid);
extern int vdev_count_leaves(spa_t *spa);
Expand Down
2 changes: 1 addition & 1 deletion include/zfeature_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ typedef enum spa_feature {
SPA_FEATURE_AVZ_V2,
SPA_FEATURE_REDACTION_LIST_SPILL,
SPA_FEATURE_RAIDZ_EXPANSION,
SPA_FEATURE_SPECIAL_WRITETHROUGH,
SPA_FEATURE_BACKUP_ALLOCATION_CLASSES,
SPA_FEATURES
} spa_feature_t;

Expand Down
10 changes: 5 additions & 5 deletions module/zcommon/zfeature_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -754,15 +754,15 @@ zpool_feature_init(void)
"Support for raidz expansion",
ZFEATURE_FLAG_MOS, ZFEATURE_TYPE_BOOLEAN, NULL, sfeatures);
{
static const spa_feature_t special_writethrough_deps[] = {
static const spa_feature_t backup_allocation_classes_deps[] = {
SPA_FEATURE_ALLOCATION_CLASSES,
SPA_FEATURE_NONE
};
zfeature_register(SPA_FEATURE_SPECIAL_WRITETHROUGH,
"org.openzfs:special_writethrough", "special_writethrough",
"Write though special device data to pool",
zfeature_register(SPA_FEATURE_BACKUP_ALLOCATION_CLASSES,
"org.openzfs:backup_allocation_classes", "backup_allocation_classes",

Check failure on line 762 in module/zcommon/zfeature_common.c

View workflow job for this annotation

GitHub Actions / checkstyle

line > 80 characters
"Automatically backup all allocation class device data to pool",

Check failure on line 763 in module/zcommon/zfeature_common.c

View workflow job for this annotation

GitHub Actions / checkstyle

line > 80 characters
ZFEATURE_FLAG_MOS | ZFEATURE_FLAG_ACTIVATE_ON_ENABLE,
ZFEATURE_TYPE_BOOLEAN, special_writethrough_deps, sfeatures);
ZFEATURE_TYPE_BOOLEAN, backup_allocation_classes_deps, sfeatures);
}

zfs_mod_list_supported_free(sfeatures);
Expand Down
12 changes: 6 additions & 6 deletions module/zfs/metaslab.c
Original file line number Diff line number Diff line change
Expand Up @@ -5847,16 +5847,16 @@ metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp,
dva_t *dva = bp->blk_dva;
dva_t *hintdva = (hintbp != NULL) ? hintbp->blk_dva : NULL;
int error = 0;
boolean_t is_special_writethrough = B_FALSE;
boolean_t is_backup_alloc_class = B_FALSE;

if (((mc == spa_special_class(spa)) || (mc == spa_dedup_class(spa))) &&
spa_feature_is_active(spa, SPA_FEATURE_SPECIAL_WRITETHROUGH)) {
is_special_writethrough = B_TRUE;
spa_feature_is_active(spa, SPA_FEATURE_BACKUP_ALLOCATION_CLASSES)) {
is_backup_alloc_class = B_TRUE;
}

ASSERT(bp->blk_birth == 0);
ASSERT(BP_PHYSICAL_BIRTH(bp) == 0);
ASSERT(!(is_special_writethrough && ndvas == 1));
ASSERT(!(is_backup_alloc_class && ndvas == 1));

spa_config_enter(spa, SCL_ALLOC, FTAG, RW_READER);

Expand All @@ -5873,10 +5873,10 @@ metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp,

for (int d = 0; d < ndvas; d++) {
metaslab_class_t *_mc;
if (is_special_writethrough && d == 1) {
if (is_backup_alloc_class && d == 1) {
/*
* If it's a special device with
* SPA_FEATURE_SPECIAL_WRITETHROUGH active, then write the
* SPA_FEATURE_BACKUP_ALLOCATION_CLASSES active, then write the
* first data to the special device, and the 2nd copy
* to the normal pool. That way, if the special device is
* lost, there's still a backup.
Expand Down
8 changes: 4 additions & 4 deletions module/zfs/spa.c
Original file line number Diff line number Diff line change
Expand Up @@ -2508,7 +2508,7 @@ spa_check_removed(vdev_t *vd)
* If we have missing top-level vdevs, and they are all special
* devices, then we may still be able to import the pool.
*
* Check if the SPA_FEATURE_SPECIAL_WRITETHROUGH feature flag is set.
* Check if the SPA_FEATURE_BACKUP_ALLOCATION_CLASSES feature flag is set.
* If it is set, then we know the special device's data is backed up to
* the regular pool (via ditto blocks), and thus we can import the pool
* without the special devices.
Expand All @@ -2524,7 +2524,7 @@ spa_check_for_bad_special_devices(spa_t *spa)
* SPA_FEATURE_SPECIAL_WRITETHOUGH is not enabled? If so, then we
* can't import.
*/
if (!spa_feature_is_active(spa, SPA_FEATURE_SPECIAL_WRITETHROUGH)) {
if (!spa_feature_is_active(spa, SPA_FEATURE_BACKUP_ALLOCATION_CLASSES)) {
spa_load_note(spa, "some special devices are missing, cannot "
"import.");
return (SET_ERROR(ENXIO));
Expand Down Expand Up @@ -3987,11 +3987,11 @@ spa_ld_open_vdevs(spa_t *spa)
/*
* Special case: If all the missing top-level vdevs are special
* devices, we may or may not be able to import the pool,
* depending on whether SPA_FEATURE_SPECIAL_WRITETHROUGH is
* depending on whether SPA_FEATURE_BACKUP_ALLOCATION_CLASSES is
* enabled or not. At this early stage of import we do not
* have the feature flags loaded yet, so for now proceed
* with the import. We will do the
* SPA_FEATURE_SPECIAL_WRITETHROUGH check later after the
* SPA_FEATURE_BACKUP_ALLOCATION_CLASSES check later after the
* feature flags are loaded.
*/
spa_load_note(spa, "vdev tree has %lld missing special "
Expand Down
7 changes: 7 additions & 0 deletions module/zfs/spa_misc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1645,6 +1645,13 @@ spa_activate_allocation_classes(spa_t *spa, dmu_tx_t *tx)
*/
ASSERT(spa_feature_is_enabled(spa, SPA_FEATURE_ALLOCATION_CLASSES));
spa_feature_incr(spa, SPA_FEATURE_ALLOCATION_CLASSES, tx);

/*
* If we have the allocation classes backup feature enabled then
* make it active here as well.
*/
if (spa_feature_is_enabled(spa, SPA_FEATURE_BACKUP_ALLOCATION_CLASSES))
spa_feature_incr(spa, SPA_FEATURE_BACKUP_ALLOCATION_CLASSES, tx);
}

/*
Expand Down
8 changes: 4 additions & 4 deletions module/zfs/vdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -5239,7 +5239,7 @@ vdev_propagate_state(vdev_t *vd)
* degraded.
*/
if ((child->vdev_islog ||
vdev_is_special_writethrough(spa, child) ||
vdev_is_backup_alloc_class(spa, child) ||
(vdev_is_alloc_class(child) && !spa->spa_feat_is_loaded)) && vd == rvd)
degraded++;
else
Expand Down Expand Up @@ -5493,15 +5493,15 @@ vdev_is_alloc_class(vdev_t *vd)
}

/*
* Is the vdev a special device and has SPA_FEATURE_SPECIAL_WRITETHROUGH
* Is the vdev a special device and has SPA_FEATURE_BACKUP_ALLOCATION_CLASSES
* enabled?
*
* This function works for both top-level vdevs and leaf vdevs.
*/
boolean_t
vdev_is_special_writethrough(spa_t *spa, vdev_t *vd)
vdev_is_backup_alloc_class(spa_t *spa, vdev_t *vd)
{
if (!spa_feature_is_active(spa, SPA_FEATURE_SPECIAL_WRITETHROUGH)) {
if (!spa_feature_is_active(spa, SPA_FEATURE_BACKUP_ALLOCATION_CLASSES)) {
vdev_dbgmsg(vd, "feature flag not enabled, loaded %d", spa->spa_feat_is_loaded);
return (B_FALSE);
} else {
Expand Down
53 changes: 47 additions & 6 deletions module/zfs/vdev_label.c
Original file line number Diff line number Diff line change
Expand Up @@ -1708,6 +1708,7 @@ static void
vdev_uberblock_sync_done(zio_t *zio)
{
uint64_t *good_writes = zio->io_private;
vdev_dbgmsg(zio->io_vd, "done error %d ms_array %lu", zio->io_error, (long unsigned) zio->io_vd->vdev_top->vdev_ms_array);

if (zio->io_error == 0 && zio->io_vd->vdev_top->vdev_ms_array != 0)
atomic_inc_64(good_writes);
Expand All @@ -1720,6 +1721,7 @@ static void
vdev_uberblock_sync(zio_t *zio, uint64_t *good_writes,
uberblock_t *ub, vdev_t *vd, int flags)
{
vdev_dbgmsg(vd, "begin");
for (uint64_t c = 0; c < vd->vdev_children; c++) {
vdev_uberblock_sync(zio, good_writes,
ub, vd->vdev_child[c], flags);
Expand All @@ -1740,6 +1742,7 @@ vdev_uberblock_sync(zio_t *zio, uint64_t *good_writes,
if (vd->vdev_ops == &vdev_draid_spare_ops)
return;

vdev_dbgmsg(vd, "looking at state");
/* If the vdev was expanded, need to copy uberblock rings. */
if (vd->vdev_state == VDEV_STATE_HEALTHY &&
vd->vdev_copy_uberblocks == B_TRUE) {
Expand Down Expand Up @@ -1771,12 +1774,14 @@ vdev_uberblock_sync(zio_t *zio, uint64_t *good_writes,
abd_zero_off(ub_abd, sizeof (uberblock_t),
VDEV_UBERBLOCK_SIZE(vd) - sizeof (uberblock_t));

for (int l = 0; l < VDEV_LABELS; l++)
vdev_dbgmsg(vd, "about to write labels");
for (int l = 0; l < VDEV_LABELS; l++) {
vdev_dbgmsg(vd, "writing label %d", l);
vdev_label_write(zio, vd, l, ub_abd,
VDEV_UBERBLOCK_OFFSET(vd, n), VDEV_UBERBLOCK_SIZE(vd),
vdev_uberblock_sync_done, good_writes,
flags | ZIO_FLAG_DONT_PROPAGATE);

}
abd_free(ub_abd);
}

Expand All @@ -1787,11 +1792,16 @@ vdev_uberblock_sync_list(vdev_t **svd, int svdcount, uberblock_t *ub, int flags)
spa_t *spa = svd[0]->vdev_spa;
zio_t *zio;
uint64_t good_writes = 0;
boolean_t all_failures_are_alloc_class = B_FALSE;
int rc;

zio = zio_root(spa, NULL, NULL, flags);

for (int v = 0; v < svdcount; v++)
for (int v = 0; v < svdcount; v++) {
vdev_dbgmsg(svd[v], "%d doing sync list", v);
vdev_uberblock_sync(zio, &good_writes, ub, svd[v], flags);
vdev_dbgmsg(svd[v], "%d done, good_writes= %lu", v, (unsigned long) good_writes);
}

(void) zio_wait(zio);

Expand All @@ -1810,7 +1820,38 @@ vdev_uberblock_sync_list(vdev_t **svd, int svdcount, uberblock_t *ub, int flags)

(void) zio_wait(zio);

return (good_writes >= 1 ? 0 : EIO);
/*
* Special case:
*
* If we had zero good writes, but all the writes were to special
* allocation class disks, and BACKUP_ALLOCATION_CLASSES was enabled,
* then it's not fatal.
*/
if (good_writes == 0) {
all_failures_are_alloc_class = B_TRUE;
for (int v = 0; v < svdcount; v++) {
if (!vdev_is_backup_alloc_class(spa, svd[v])) {
all_failures_are_alloc_class = B_FALSE;
break;
}
}
}

if (good_writes >= 1) {
/* success */
rc = 0;
} else if (all_failures_are_alloc_class) {
/*
* All the failures are special allocation classes disks. This
* isn't fatal if BACKUP_ALLOCATION_CLASSES is enabled.
*/
rc = 0;
} else {
/* failure */
rc = EIO;
}

return (rc);
}

/*
Expand Down Expand Up @@ -1924,12 +1965,12 @@ vdev_label_sync_list(spa_t *spa, int l, uint64_t txg, int flags)

ASSERT(!vd->vdev_ishole);

vdev_dbgmsg(vd, "vdev_is_special_writethrough = %d, guid %lu", vdev_is_special_writethrough(spa, vd), (unsigned long) vd->vdev_guid);
vdev_dbgmsg(vd, "vdev_is_backup_alloc_class = %d, guid %lu", vdev_is_backup_alloc_class(spa, vd), (unsigned long) vd->vdev_guid);

good_writes = kmem_zalloc(sizeof (uint64_t), KM_SLEEP);
zio_t *vio = zio_null(zio, spa, NULL,
(vd->vdev_islog || vd->vdev_aux != NULL ||
vdev_is_special_writethrough(spa, vd)) ?
vdev_is_backup_alloc_class(spa, vd)) ?
vdev_label_sync_ignore_done : vdev_label_sync_top_done,
good_writes, flags);
vdev_label_sync(vio, good_writes, vd, l, txg, flags);
Expand Down
2 changes: 1 addition & 1 deletion module/zfs/vdev_root.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ vdev_root_core_tvds(vdev_t *vd)
vdev_t *cvd = vd->vdev_child[c];

if (!cvd->vdev_ishole && !cvd->vdev_islog &&
!vdev_is_special_writethrough(vd->vdev_spa, vd) &&
!vdev_is_backup_alloc_class(vd->vdev_spa, vd) &&
cvd->vdev_ops != &vdev_indirect_ops) {
tvds++;
}
Expand Down
23 changes: 18 additions & 5 deletions module/zfs/zio.c
Original file line number Diff line number Diff line change
Expand Up @@ -3504,6 +3504,19 @@ zio_ddt_write(zio_t *zio)
ASSERT(BP_IS_HOLE(bp) || zio->io_bp_override);
ASSERT(!(zio->io_bp_override && (zio->io_flags & ZIO_FLAG_RAW)));

/*
* If SPA_FEATURE_BACKUP_ALLOCATION_CLASSES is enabled, then we save
* a copy of all allocation classes writes to the normal pool. Since
* dedup is a allocation class, we need to adjust its copies here
* so the later ZIO stages work correctly.
*/
if (spa_feature_is_active(spa, SPA_FEATURE_BACKUP_ALLOCATION_CLASSES) &&
zp->zp_copies == 1) {
zp->zp_copies = 2;
}

p = zp->zp_copies;

ddt_enter(ddt);
dde = ddt_lookup(ddt, bp, B_TRUE);
ddp = &dde->dde_phys[p];
Expand Down Expand Up @@ -3635,18 +3648,18 @@ zio_dva_throttle(zio_t *zio)
zio->io_prop.zp_level, zio->io_prop.zp_zpl_smallblk);

/*
* If special writethough is enabled, we will do the regular
* write to the special device and an additional "backup"
* write to the normal pool. That way if the special devices
* If backup alloc classes is enabled, we will do the regular
* write to the special/dedup device and an additional "backup"
* write to the normal pool. That way if the special/dedup devices
* all fail, we don't lose all data in our pool.
*
* Reserve that 2nd write to the regular pool here. The DVAs
* for both writes will later be allocated in the
* next step in the ZIO pipeline in
* zio_dva_allocate()->metaslab_alloc().
*/
if (mc == spa_special_class(spa) &&
spa_feature_is_active(spa, SPA_FEATURE_SPECIAL_WRITETHROUGH) &&
if ((mc == spa_special_class(spa) || mc == spa_dedup_class(spa)) &&
spa_feature_is_active(spa, SPA_FEATURE_BACKUP_ALLOCATION_CLASSES) &&
zio->io_prop.zp_copies == 1) {
zio->io_prop.zp_copies = 2;
}
Expand Down
2 changes: 1 addition & 1 deletion tests/runfiles/common.run
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ tests = ['alloc_class_001_pos', 'alloc_class_002_neg', 'alloc_class_003_pos',
'alloc_class_007_pos', 'alloc_class_008_pos', 'alloc_class_009_pos',
'alloc_class_010_pos', 'alloc_class_011_neg', 'alloc_class_012_pos',
'alloc_class_013_pos', 'alloc_class_014_neg', 'alloc_class_015_pos',
'special_writethrough']
'backup_alloc_class']
tags = ['functional', 'alloc_class']

[tests/functional/append]
Expand Down
1 change: 1 addition & 0 deletions tests/test-runner/include/logapi.shlib
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,7 @@ function _endlog
function _printline
{
echo "$@"
echo "$@" >> /tmp/log
}

# Output an error message
Expand Down
2 changes: 1 addition & 1 deletion tests/zfs-tests/tests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/alloc_class/alloc_class_013_pos.ksh \
functional/alloc_class/alloc_class_014_neg.ksh \
functional/alloc_class/alloc_class_015_pos.ksh \
functional/alloc_class/special_writethrough.ksh \
functional/alloc_class/backup_alloc_class.ksh \
functional/alloc_class/cleanup.ksh \
functional/alloc_class/setup.ksh \
functional/append/file_append.ksh \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ function disk_cleanup
{
rm -f $ZPOOL_DEVSIZE $ZPOOL_DISKS 2> /dev/null
rm -f $CLASS_DEVSIZE $CLASS_DISKS 2> /dev/null

# Delete the compressed backup copies used by 'backup_alloc_class.ksh'
for disk in $CLASS_DISKS ; do
rm -f ${disk}.gz
done
}

function cleanup
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ log_onexit cleanup

log_must disk_setup

for arg in '-o feature@special_writethrough=disabled' '' ; do
for arg in '-o feature@backup_allocation_classes=disabled' '' ; do
for type in special dedup; do
log_mustnot zpool create $args -d $TESTPOOL $CLASS_DISK0 $type \
$CLASS_DISK1
Expand Down
Loading

0 comments on commit d2b0cdc

Please sign in to comment.