Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ZFS send should use spill block prefetched from send_reader_thread #16701

Merged
merged 1 commit into from
Nov 6, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 64 additions & 62 deletions module/zfs/dmu_send.c
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,8 @@ struct send_range {
*/
dnode_phys_t *dnp;
blkptr_t bp;
/* Piggyback unmodified spill block */
struct send_range *spill_range;
} object;
struct srr {
uint32_t datablksz;
Expand Down Expand Up @@ -231,6 +233,8 @@ range_free(struct send_range *range)
size_t size = sizeof (dnode_phys_t) *
(range->sru.object.dnp->dn_extra_slots + 1);
kmem_free(range->sru.object.dnp, size);
if (range->sru.object.spill_range)
range_free(range->sru.object.spill_range);
} else if (range->type == DATA) {
mutex_enter(&range->sru.data.lock);
while (range->sru.data.io_outstanding)
Expand Down Expand Up @@ -617,7 +621,7 @@ dump_spill(dmu_send_cookie_t *dscp, const blkptr_t *bp, uint64_t object,
drrs->drr_length = blksz;
drrs->drr_toguid = dscp->dsc_toguid;

/* See comment in dump_dnode() for full details */
/* See comment in piggyback_unmodified_spill() for full details */
if (zfs_send_unmodified_spill_blocks &&
(BP_GET_LOGICAL_BIRTH(bp) <= dscp->dsc_fromtxg)) {
drrs->drr_flags |= DRR_SPILL_UNMODIFIED;
Expand Down Expand Up @@ -793,35 +797,6 @@ dump_dnode(dmu_send_cookie_t *dscp, const blkptr_t *bp, uint64_t object,
(dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), DMU_OBJECT_END) != 0)
return (SET_ERROR(EINTR));

/*
* Send DRR_SPILL records for unmodified spill blocks. This is useful
* because changing certain attributes of the object (e.g. blocksize)
* can cause old versions of ZFS to incorrectly remove a spill block.
* Including these records in the stream forces an up to date version
* to always be written ensuring they're never lost. Current versions
* of the code which understand the DRR_FLAG_SPILL_BLOCK feature can
* ignore these unmodified spill blocks.
*/
if (zfs_send_unmodified_spill_blocks &&
(dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) &&
(BP_GET_LOGICAL_BIRTH(DN_SPILL_BLKPTR(dnp)) <= dscp->dsc_fromtxg)) {
struct send_range record;
blkptr_t *bp = DN_SPILL_BLKPTR(dnp);

memset(&record, 0, sizeof (struct send_range));
record.type = DATA;
record.object = object;
record.eos_marker = B_FALSE;
record.start_blkid = DMU_SPILL_BLKID;
record.end_blkid = record.start_blkid + 1;
record.sru.data.bp = *bp;
record.sru.data.obj_type = dnp->dn_type;
record.sru.data.datablksz = BP_GET_LSIZE(bp);

if (do_dump(dscp, &record) != 0)
return (SET_ERROR(EINTR));
}

if (dscp->dsc_err != 0)
return (SET_ERROR(EINTR));

Expand Down Expand Up @@ -911,6 +886,9 @@ do_dump(dmu_send_cookie_t *dscp, struct send_range *range)
case OBJECT:
err = dump_dnode(dscp, &range->sru.object.bp, range->object,
range->sru.object.dnp);
/* Dump piggybacked unmodified spill block */
if (!err && range->sru.object.spill_range)
err = do_dump(dscp, range->sru.object.spill_range);
return (err);
case OBJECT_RANGE: {
ASSERT3U(range->start_blkid + 1, ==, range->end_blkid);
Expand Down Expand Up @@ -939,34 +917,7 @@ do_dump(dmu_send_cookie_t *dscp, struct send_range *range)

ASSERT3U(srdp->datablksz, ==, BP_GET_LSIZE(bp));
ASSERT3U(range->start_blkid + 1, ==, range->end_blkid);
if (BP_GET_TYPE(bp) == DMU_OT_SA) {
arc_flags_t aflags = ARC_FLAG_WAIT;
zio_flag_t zioflags = ZIO_FLAG_CANFAIL;

if (dscp->dsc_featureflags & DMU_BACKUP_FEATURE_RAW) {
ASSERT(BP_IS_PROTECTED(bp));
zioflags |= ZIO_FLAG_RAW;
}

zbookmark_phys_t zb;
ASSERT3U(range->start_blkid, ==, DMU_SPILL_BLKID);
zb.zb_objset = dmu_objset_id(dscp->dsc_os);
zb.zb_object = range->object;
zb.zb_level = 0;
zb.zb_blkid = range->start_blkid;

arc_buf_t *abuf = NULL;
if (!dscp->dsc_dso->dso_dryrun && arc_read(NULL, spa,
bp, arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
zioflags, &aflags, &zb) != 0)
return (SET_ERROR(EIO));

err = dump_spill(dscp, bp, zb.zb_object,
(abuf == NULL ? NULL : abuf->b_data));
if (abuf != NULL)
arc_buf_destroy(abuf, &abuf);
return (err);
}
if (send_do_embed(bp, dscp->dsc_featureflags)) {
err = dump_write_embedded(dscp, range->object,
range->start_blkid * srdp->datablksz,
Expand All @@ -975,8 +926,9 @@ do_dump(dmu_send_cookie_t *dscp, struct send_range *range)
}
ASSERT(range->object > dscp->dsc_resume_object ||
(range->object == dscp->dsc_resume_object &&
(range->start_blkid == DMU_SPILL_BLKID ||
range->start_blkid * srdp->datablksz >=
dscp->dsc_resume_offset));
dscp->dsc_resume_offset)));
/* it's a level-0 block of a regular object */

mutex_enter(&srdp->lock);
Expand Down Expand Up @@ -1006,8 +958,6 @@ do_dump(dmu_send_cookie_t *dscp, struct send_range *range)
ASSERT(dscp->dsc_dso->dso_dryrun ||
srdp->abuf != NULL || srdp->abd != NULL);

uint64_t offset = range->start_blkid * srdp->datablksz;

char *data = NULL;
if (srdp->abd != NULL) {
data = abd_to_buf(srdp->abd);
Expand All @@ -1016,6 +966,14 @@ do_dump(dmu_send_cookie_t *dscp, struct send_range *range)
data = srdp->abuf->b_data;
}

if (BP_GET_TYPE(bp) == DMU_OT_SA) {
ASSERT3U(range->start_blkid, ==, DMU_SPILL_BLKID);
err = dump_spill(dscp, bp, range->object, data);
return (err);
}

uint64_t offset = range->start_blkid * srdp->datablksz;

/*
* If we have large blocks stored on disk but the send flags
* don't allow us to send large blocks, we split the data from
Expand Down Expand Up @@ -1098,6 +1056,8 @@ range_alloc(enum type type, uint64_t object, uint64_t start_blkid,
range->sru.data.io_outstanding = 0;
range->sru.data.io_err = 0;
range->sru.data.io_compressed = B_FALSE;
} else if (type == OBJECT) {
range->sru.object.spill_range = NULL;
}
return (range);
}
Expand Down Expand Up @@ -1742,6 +1702,45 @@ enqueue_range(struct send_reader_thread_arg *srta, bqueue_t *q, dnode_t *dn,
bqueue_enqueue(q, range, datablksz);
}

/*
* Send DRR_SPILL records for unmodified spill blocks. This is useful
* because changing certain attributes of the object (e.g. blocksize)
* can cause old versions of ZFS to incorrectly remove a spill block.
* Including these records in the stream forces an up to date version
* to always be written ensuring they're never lost. Current versions
* of the code which understand the DRR_FLAG_SPILL_BLOCK feature can
* ignore these unmodified spill blocks.
*
* We piggyback the spill_range to dnode range instead of enqueueing it
* so send_range_after won't complain.
*/
static uint64_t
piggyback_unmodified_spill(struct send_reader_thread_arg *srta,
struct send_range *range)
{
ASSERT3U(range->type, ==, OBJECT);

dnode_phys_t *dnp = range->sru.object.dnp;
uint64_t fromtxg = srta->smta->to_arg->fromtxg;

if (!zfs_send_unmodified_spill_blocks ||
!(dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ||
!(BP_GET_LOGICAL_BIRTH(DN_SPILL_BLKPTR(dnp)) <= fromtxg))
return (0);

blkptr_t *bp = DN_SPILL_BLKPTR(dnp);
struct send_range *spill_range = range_alloc(DATA, range->object,
DMU_SPILL_BLKID, DMU_SPILL_BLKID+1, B_FALSE);
spill_range->sru.data.bp = *bp;
spill_range->sru.data.obj_type = dnp->dn_type;
spill_range->sru.data.datablksz = BP_GET_LSIZE(bp);

issue_data_read(srta, spill_range);
range->sru.object.spill_range = spill_range;

return (BP_GET_LSIZE(bp));
}

/*
* This thread is responsible for two things: First, it retrieves the correct
* blkptr in the to ds if we need to send the data because of something from
Expand Down Expand Up @@ -1773,17 +1772,20 @@ send_reader_thread(void *arg)
uint64_t last_obj_exists = B_TRUE;
while (!range->eos_marker && !srta->cancel && smta->error == 0 &&
err == 0) {
uint64_t spill = 0;
switch (range->type) {
case DATA:
issue_data_read(srta, range);
bqueue_enqueue(outq, range, range->sru.data.datablksz);
range = get_next_range_nofree(inq, range);
break;
case HOLE:
case OBJECT:
spill = piggyback_unmodified_spill(srta, range);
zfs_fallthrough;
case HOLE:
case OBJECT_RANGE:
case REDACT: // Redacted blocks must exist
bqueue_enqueue(outq, range, sizeof (*range));
bqueue_enqueue(outq, range, sizeof (*range) + spill);
amotin marked this conversation as resolved.
Show resolved Hide resolved
range = get_next_range_nofree(inq, range);
break;
case PREVIOUSLY_REDACTED: {
Expand Down