From bc7f63be7157eab6427e1114a840ea83e8d937b7 Mon Sep 17 00:00:00 2001 From: Richard Yao Date: Sun, 22 Dec 2024 11:12:03 -0500 Subject: [PATCH] Allocate zfs_locked_range_t memory externally from zfs_rangelock_{try,}enter() Typically, the memory allocated by kmem_alloc() can be trivially allocated from either the stack or as part of another structure. The only case where it cannot is in vdev_raidz_io_start(), although some further refactoring should be able to eliminate that case too. Allocating from the stack or as part of another data structure is faster as it gives us this memory for free, so there is little reason not to do it. This eliminates a non-neligible amount of CPU time that I have seen in flame graphs going back to the early days of OpenZFS when the tree was the ZFSOnLinux tree. This should make our VFS and zvol operations slightly faster. Some RAID-Z operations will also become slightly faster. Signed-off-by: Richard Yao --- cmd/ztest.c | 40 ++++++++--------- include/sys/dmu.h | 3 +- include/sys/zfs_rlock.h | 4 +- module/os/freebsd/zfs/zfs_vnops_os.c | 21 ++++----- module/os/freebsd/zfs/zfs_znode_os.c | 30 ++++++------- module/os/freebsd/zfs/zvol_os.c | 28 ++++++------ module/os/linux/zfs/zfs_vnops_os.c | 27 ++++++------ module/os/linux/zfs/zfs_znode_os.c | 30 ++++++------- module/os/linux/zfs/zvol_os.c | 20 +++++---- module/zfs/vdev_raidz.c | 46 +++++++++++--------- module/zfs/zfs_rlock.c | 58 +++++++++++-------------- module/zfs/zfs_vnops.c | 65 ++++++++++++++-------------- module/zfs/zvol.c | 6 +-- 13 files changed, 188 insertions(+), 190 deletions(-) diff --git a/cmd/ztest.c b/cmd/ztest.c index 4a7959ebfca5..7030b94cbfd5 100644 --- a/cmd/ztest.c +++ b/cmd/ztest.c @@ -1745,23 +1745,19 @@ ztest_object_unlock(ztest_ds_t *zd, uint64_t object) ztest_rll_unlock(rll); } -static rl_t * -ztest_range_lock(ztest_ds_t *zd, uint64_t object, uint64_t offset, +static void +ztest_range_lock(ztest_ds_t *zd, rl_t *rl, uint64_t object, uint64_t offset, uint64_t size, rl_type_t type) { uint64_t hash = object ^ (offset % (ZTEST_RANGE_LOCKS + 1)); rll_t *rll = &zd->zd_range_lock[hash & (ZTEST_RANGE_LOCKS - 1)]; - rl_t *rl; - rl = umem_alloc(sizeof (*rl), UMEM_NOFAIL); rl->rl_object = object; rl->rl_offset = offset; rl->rl_size = size; rl->rl_lock = rll; ztest_rll_lock(rll, type); - - return (rl); } static void @@ -1770,8 +1766,6 @@ ztest_range_unlock(rl_t *rl) rll_t *rll = rl->rl_lock; ztest_rll_unlock(rll); - - umem_free(rl, sizeof (*rl)); } static void @@ -2200,7 +2194,7 @@ ztest_replay_write(void *arg1, void *arg2, boolean_t byteswap) dmu_tx_t *tx; dmu_buf_t *db; arc_buf_t *abuf = NULL; - rl_t *rl; + rl_t rl; if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); @@ -2224,7 +2218,7 @@ ztest_replay_write(void *arg1, void *arg2, boolean_t byteswap) bt = NULL; ztest_object_lock(zd, lr->lr_foid, ZTRL_READER); - rl = ztest_range_lock(zd, lr->lr_foid, offset, length, ZTRL_WRITER); + ztest_range_lock(zd, &rl, lr->lr_foid, offset, length, ZTRL_WRITER); VERIFY0(dmu_bonus_hold(os, lr->lr_foid, FTAG, &db)); @@ -2249,7 +2243,7 @@ ztest_replay_write(void *arg1, void *arg2, boolean_t byteswap) if (abuf != NULL) dmu_return_arcbuf(abuf); dmu_buf_rele(db, FTAG); - ztest_range_unlock(rl); + ztest_range_unlock(&rl); ztest_object_unlock(zd, lr->lr_foid); return (ENOSPC); } @@ -2315,7 +2309,7 @@ ztest_replay_write(void *arg1, void *arg2, boolean_t byteswap) dmu_tx_commit(tx); - ztest_range_unlock(rl); + ztest_range_unlock(&rl); ztest_object_unlock(zd, lr->lr_foid); return (0); @@ -2329,13 +2323,13 @@ ztest_replay_truncate(void *arg1, void *arg2, boolean_t byteswap) objset_t *os = zd->zd_os; dmu_tx_t *tx; uint64_t txg; - rl_t *rl; + rl_t rl; if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); ztest_object_lock(zd, lr->lr_foid, ZTRL_READER); - rl = ztest_range_lock(zd, lr->lr_foid, lr->lr_offset, lr->lr_length, + ztest_range_lock(zd, &rl, lr->lr_foid, lr->lr_offset, lr->lr_length, ZTRL_WRITER); tx = dmu_tx_create(os); @@ -2344,7 +2338,7 @@ ztest_replay_truncate(void *arg1, void *arg2, boolean_t byteswap) txg = ztest_tx_assign(tx, TXG_WAIT, FTAG); if (txg == 0) { - ztest_range_unlock(rl); + ztest_range_unlock(&rl); ztest_object_unlock(zd, lr->lr_foid); return (ENOSPC); } @@ -2356,7 +2350,7 @@ ztest_replay_truncate(void *arg1, void *arg2, boolean_t byteswap) dmu_tx_commit(tx); - ztest_range_unlock(rl); + ztest_range_unlock(&rl); ztest_object_unlock(zd, lr->lr_foid); return (0); @@ -2472,12 +2466,12 @@ ztest_get_done(zgd_t *zgd, int error) { (void) error; ztest_ds_t *zd = zgd->zgd_private; - uint64_t object = ((rl_t *)zgd->zgd_lr)->rl_object; + uint64_t object = ((rl_t *)&zgd->zgd_lr)->rl_object; if (zgd->zgd_db) dmu_buf_rele(zgd->zgd_db, zgd); - ztest_range_unlock((rl_t *)zgd->zgd_lr); + ztest_range_unlock((rl_t *)&zgd->zgd_lr); ztest_object_unlock(zd, object); umem_free(zgd, sizeof (*zgd)); @@ -2527,7 +2521,7 @@ ztest_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf, zgd->zgd_private = zd; if (buf != NULL) { /* immediate write */ - zgd->zgd_lr = (struct zfs_locked_range *)ztest_range_lock(zd, + ztest_range_lock(zd, (rl_t *)&zgd->zgd_lr, object, offset, size, ZTRL_READER); error = dmu_read(os, object, offset, size, buf, @@ -2543,7 +2537,7 @@ ztest_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf, offset = 0; } - zgd->zgd_lr = (struct zfs_locked_range *)ztest_range_lock(zd, + ztest_range_lock(zd, (rl_t *)&zgd->zgd_lr, object, offset, size, ZTRL_READER); error = dmu_buf_hold_noread(os, object, offset, zgd, &db); @@ -2790,12 +2784,12 @@ ztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size) objset_t *os = zd->zd_os; dmu_tx_t *tx; uint64_t txg; - rl_t *rl; + rl_t rl; txg_wait_synced(dmu_objset_pool(os), 0); ztest_object_lock(zd, object, ZTRL_READER); - rl = ztest_range_lock(zd, object, offset, size, ZTRL_WRITER); + ztest_range_lock(zd, &rl, object, offset, size, ZTRL_WRITER); tx = dmu_tx_create(os); @@ -2811,7 +2805,7 @@ ztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size) (void) dmu_free_long_range(os, object, offset, size); } - ztest_range_unlock(rl); + ztest_range_unlock(&rl); ztest_object_unlock(zd, object); } diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 29f715039d29..67e199a40c83 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -1086,8 +1087,8 @@ typedef struct zgd { struct lwb *zgd_lwb; struct blkptr *zgd_bp; dmu_buf_t *zgd_db; - struct zfs_locked_range *zgd_lr; void *zgd_private; + zfs_locked_range_t zgd_lr; } zgd_t; typedef void dmu_sync_cb_t(zgd_t *arg, int error); diff --git a/include/sys/zfs_rlock.h b/include/sys/zfs_rlock.h index 5e5d6d68d6c5..85c99a4d73b7 100644 --- a/include/sys/zfs_rlock.h +++ b/include/sys/zfs_rlock.h @@ -69,9 +69,9 @@ typedef struct zfs_locked_range { void zfs_rangelock_init(zfs_rangelock_t *, zfs_rangelock_cb_t *, void *); void zfs_rangelock_fini(zfs_rangelock_t *); -zfs_locked_range_t *zfs_rangelock_enter(zfs_rangelock_t *, +void zfs_rangelock_enter(zfs_rangelock_t *, zfs_locked_range_t *, uint64_t, uint64_t, zfs_rangelock_type_t); -zfs_locked_range_t *zfs_rangelock_tryenter(zfs_rangelock_t *, +boolean_t zfs_rangelock_tryenter(zfs_rangelock_t *, zfs_locked_range_t *, uint64_t, uint64_t, zfs_rangelock_type_t); void zfs_rangelock_exit(zfs_locked_range_t *); void zfs_rangelock_reduce(zfs_locked_range_t *, uint64_t, uint64_t); diff --git a/module/os/freebsd/zfs/zfs_vnops_os.c b/module/os/freebsd/zfs/zfs_vnops_os.c index b8c2c341dace..2c256a06c2cb 100644 --- a/module/os/freebsd/zfs/zfs_vnops_os.c +++ b/module/os/freebsd/zfs/zfs_vnops_os.c @@ -3924,7 +3924,8 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind, { znode_t *zp = VTOZ(vp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; - zfs_locked_range_t *lr; + zfs_locked_range_t lr; + boolean_t res; vm_object_t object; off_t start, end, obj_size; uint_t blksz; @@ -3948,9 +3949,9 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind, blksz = zp->z_blksz; len = roundup(end, blksz) - rounddown(start, blksz); - lr = zfs_rangelock_tryenter(&zp->z_rangelock, + res = zfs_rangelock_tryenter(&zp->z_rangelock, &lr, rounddown(start, blksz), len, RL_READER); - if (lr == NULL) { + if (res == B_FALSE) { /* * Avoid a deadlock with update_pages(). We need to * hold the range lock when copying from the DMU, so @@ -3963,7 +3964,7 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind, for (int i = 0; i < count; i++) vm_page_xunbusy(ma[i]); - lr = zfs_rangelock_enter(&zp->z_rangelock, + zfs_rangelock_enter(&zp->z_rangelock, &lr, rounddown(start, blksz), len, RL_READER); zfs_vmobject_wlock(object); @@ -3974,14 +3975,14 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind, } if (blksz == zp->z_blksz) break; - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); } zfs_vmobject_wlock(object); obj_size = object->un_pager.vnp.vnp_size; zfs_vmobject_wunlock(object); if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) { - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); zfs_exit(zfsvfs, FTAG); return (zfs_vm_pagerret_bad); } @@ -4032,7 +4033,7 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind, i += count1 - 1; } - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); ZFS_ACCESSTIME_STAMP(zfsvfs, zp); dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, count*PAGE_SIZE); @@ -4075,7 +4076,7 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags, { znode_t *zp = VTOZ(vp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; - zfs_locked_range_t *lr; + zfs_locked_range_t lr; dmu_tx_t *tx; struct sf_buf *sf; vm_object_t object; @@ -4107,7 +4108,7 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags, blksz = zp->z_blksz; lo_off = rounddown(off, blksz); lo_len = roundup(len + (off - lo_off), blksz); - lr = zfs_rangelock_enter(&zp->z_rangelock, lo_off, lo_len, RL_WRITER); + zfs_rangelock_enter(&zp->z_rangelock, &lr, lo_off, lo_len, RL_WRITER); zfs_vmobject_wlock(object); if (len + off > object->un_pager.vnp.vnp_size) { @@ -4213,7 +4214,7 @@ zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags, dmu_tx_commit(tx); out: - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); if (commit) zil_commit(zfsvfs->z_log, zp->z_id); diff --git a/module/os/freebsd/zfs/zfs_znode_os.c b/module/os/freebsd/zfs/zfs_znode_os.c index 31ca07a86dda..922521871ae6 100644 --- a/module/os/freebsd/zfs/zfs_znode_os.c +++ b/module/os/freebsd/zfs/zfs_znode_os.c @@ -1371,20 +1371,20 @@ zfs_extend(znode_t *zp, uint64_t end) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; dmu_tx_t *tx; - zfs_locked_range_t *lr; + zfs_locked_range_t lr; uint64_t newblksz; int error; /* * We will change zp_size, lock the whole file. */ - lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER); + zfs_rangelock_enter(&zp->z_rangelock, &lr, 0, UINT64_MAX, RL_WRITER); /* * Nothing to do if file already at desired length. */ if (end <= zp->z_size) { - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); return (0); } tx = dmu_tx_create(zfsvfs->z_os); @@ -1414,7 +1414,7 @@ zfs_extend(znode_t *zp, uint64_t end) error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); return (error); } @@ -1428,7 +1428,7 @@ zfs_extend(znode_t *zp, uint64_t end) vnode_pager_setsize(ZTOV(zp), end); - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); dmu_tx_commit(tx); @@ -1448,19 +1448,19 @@ static int zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; - zfs_locked_range_t *lr; + zfs_locked_range_t lr; int error; /* * Lock the range being freed. */ - lr = zfs_rangelock_enter(&zp->z_rangelock, off, len, RL_WRITER); + zfs_rangelock_enter(&zp->z_rangelock, &lr, off, len, RL_WRITER); /* * Nothing to do if file already at desired length. */ if (off >= zp->z_size) { - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); return (0); } @@ -1482,7 +1482,7 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) #endif } - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); return (error); } @@ -1501,7 +1501,7 @@ zfs_trunc(znode_t *zp, uint64_t end) zfsvfs_t *zfsvfs = zp->z_zfsvfs; vnode_t *vp = ZTOV(zp); dmu_tx_t *tx; - zfs_locked_range_t *lr; + zfs_locked_range_t lr; int error; sa_bulk_attr_t bulk[2]; int count = 0; @@ -1509,20 +1509,20 @@ zfs_trunc(znode_t *zp, uint64_t end) /* * We will change zp_size, lock the whole file. */ - lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER); + zfs_rangelock_enter(&zp->z_rangelock, &lr, 0, UINT64_MAX, RL_WRITER); /* * Nothing to do if file already at desired length. */ if (end >= zp->z_size) { - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); return (0); } error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end, DMU_OBJECT_END); if (error) { - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); return (error); } tx = dmu_tx_create(zfsvfs->z_os); @@ -1532,7 +1532,7 @@ zfs_trunc(znode_t *zp, uint64_t end) error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); return (error); } @@ -1557,7 +1557,7 @@ zfs_trunc(znode_t *zp, uint64_t end) */ vnode_pager_setsize(vp, end); - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); return (0); } diff --git a/module/os/freebsd/zfs/zvol_os.c b/module/os/freebsd/zfs/zvol_os.c index c3be4730d4b6..4d4d227e51a6 100644 --- a/module/os/freebsd/zfs/zvol_os.c +++ b/module/os/freebsd/zfs/zvol_os.c @@ -666,7 +666,7 @@ zvol_geom_bio_strategy(struct bio *bp) size_t resid; char *addr; objset_t *os; - zfs_locked_range_t *lr; + zfs_locked_range_t lr; int error = 0; boolean_t doread = B_FALSE; boolean_t is_dumpified; @@ -731,7 +731,7 @@ zvol_geom_bio_strategy(struct bio *bp) * There must be no buffer changes when doing a dmu_sync() because * we can't change the data whilst calculating the checksum. */ - lr = zfs_rangelock_enter(&zv->zv_rangelock, off, resid, + zfs_rangelock_enter(&zv->zv_rangelock, &lr, off, resid, doread ? RL_READER : RL_WRITER); if (bp->bio_cmd == BIO_DELETE) { @@ -776,7 +776,7 @@ zvol_geom_bio_strategy(struct bio *bp) resid -= size; } unlock: - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); bp->bio_completed = bp->bio_length - resid; if (bp->bio_completed < bp->bio_length && off > volsize) @@ -821,7 +821,7 @@ zvol_cdev_read(struct cdev *dev, struct uio *uio_s, int ioflag) { zvol_state_t *zv; uint64_t volsize; - zfs_locked_range_t *lr; + zfs_locked_range_t lr; int error = 0; zfs_uio_t uio; @@ -840,7 +840,7 @@ zvol_cdev_read(struct cdev *dev, struct uio *uio_s, int ioflag) rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); ssize_t start_resid = zfs_uio_resid(&uio); - lr = zfs_rangelock_enter(&zv->zv_rangelock, zfs_uio_offset(&uio), + zfs_rangelock_enter(&zv->zv_rangelock, &lr, zfs_uio_offset(&uio), zfs_uio_resid(&uio), RL_READER); while (zfs_uio_resid(&uio) > 0 && zfs_uio_offset(&uio) < volsize) { uint64_t bytes = MIN(zfs_uio_resid(&uio), DMU_MAX_ACCESS >> 1); @@ -857,7 +857,7 @@ zvol_cdev_read(struct cdev *dev, struct uio *uio_s, int ioflag) break; } } - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); int64_t nread = start_resid - zfs_uio_resid(&uio); dataset_kstats_update_read_kstats(&zv->zv_kstat, nread); rw_exit(&zv->zv_suspend_lock); @@ -870,7 +870,7 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag) { zvol_state_t *zv; uint64_t volsize; - zfs_locked_range_t *lr; + zfs_locked_range_t lr; int error = 0; boolean_t commit; zfs_uio_t uio; @@ -892,7 +892,7 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag) rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); zvol_ensure_zilog(zv); - lr = zfs_rangelock_enter(&zv->zv_rangelock, zfs_uio_offset(&uio), + zfs_rangelock_enter(&zv->zv_rangelock, &lr, zfs_uio_offset(&uio), zfs_uio_resid(&uio), RL_WRITER); while (zfs_uio_resid(&uio) > 0 && zfs_uio_offset(&uio) < volsize) { uint64_t bytes = MIN(zfs_uio_resid(&uio), DMU_MAX_ACCESS >> 1); @@ -916,7 +916,7 @@ zvol_cdev_write(struct cdev *dev, struct uio *uio_s, int ioflag) if (error) break; } - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); int64_t nwritten = start_resid - zfs_uio_resid(&uio); dataset_kstats_update_write_kstats(&zv->zv_kstat, nwritten); if (commit) @@ -1109,7 +1109,7 @@ zvol_cdev_ioctl(struct cdev *dev, ulong_t cmd, caddr_t data, int fflag, struct thread *td) { zvol_state_t *zv; - zfs_locked_range_t *lr; + zfs_locked_range_t lr; off_t offset, length; int error; boolean_t sync; @@ -1149,7 +1149,7 @@ zvol_cdev_ioctl(struct cdev *dev, ulong_t cmd, caddr_t data, } rw_enter(&zv->zv_suspend_lock, ZVOL_RW_READER); zvol_ensure_zilog(zv); - lr = zfs_rangelock_enter(&zv->zv_rangelock, offset, length, + zfs_rangelock_enter(&zv->zv_rangelock, &lr, offset, length, RL_WRITER); dmu_tx_t *tx = dmu_tx_create(zv->zv_objset); error = dmu_tx_assign(tx, TXG_WAIT); @@ -1163,7 +1163,7 @@ zvol_cdev_ioctl(struct cdev *dev, ulong_t cmd, caddr_t data, error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset, length); } - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); if (sync) zil_commit(zv->zv_zilog, ZVOL_OBJ); rw_exit(&zv->zv_suspend_lock); @@ -1209,10 +1209,10 @@ zvol_cdev_ioctl(struct cdev *dev, ulong_t cmd, caddr_t data, hole = (cmd == FIOSEEKHOLE); noff = *off; - lr = zfs_rangelock_enter(&zv->zv_rangelock, 0, UINT64_MAX, + zfs_rangelock_enter(&zv->zv_rangelock, &lr, 0, UINT64_MAX, RL_READER); error = dmu_offset_next(zv->zv_objset, ZVOL_OBJ, hole, &noff); - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); *off = noff; break; } diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c index a882c88a7a72..dc114347d676 100644 --- a/module/os/linux/zfs/zfs_vnops_os.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -3790,14 +3790,14 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc, redirty_page_for_writepage(wbc, pp); unlock_page(pp); - zfs_locked_range_t *lr = zfs_rangelock_enter(&zp->z_rangelock, - pgoff, pglen, RL_WRITER); + zfs_locked_range_t lr; + zfs_rangelock_enter(&zp->z_rangelock, &lr, pgoff, pglen, RL_WRITER); lock_page(pp); /* Page mapping changed or it was no longer dirty, we're done */ if (unlikely((mapping != pp->mapping) || !PageDirty(pp))) { unlock_page(pp); - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); zfs_exit(zfsvfs, FTAG); return (0); } @@ -3805,7 +3805,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc, /* Another process started write block if required */ if (PageWriteback(pp)) { unlock_page(pp); - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); if (wbc->sync_mode != WB_SYNC_NONE) { /* @@ -3832,7 +3832,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc, /* Clear the dirty flag the required locks are held */ if (!clear_page_dirty_for_io(pp)) { unlock_page(pp); - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); zfs_exit(zfsvfs, FTAG); return (0); } @@ -3864,7 +3864,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc, end_page_writeback(pp); if (!for_sync) atomic_dec_32(&zp->z_async_writes_cnt); - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); zfs_exit(zfsvfs, FTAG); return (err); } @@ -3915,7 +3915,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc, dmu_tx_commit(tx); - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); if (commit) zil_commit(zfsvfs->z_log, zp->z_id); @@ -4127,9 +4127,10 @@ zfs_getpage(struct inode *ip, struct page *pp) * dmu_read_impl() for db->db_data during the mempcy operation when * zfs_fillpage() calls dmu_read(). */ - zfs_locked_range_t *lr = zfs_rangelock_tryenter(&zp->z_rangelock, - io_off, io_len, RL_READER); - if (lr == NULL) { + zfs_locked_range_t lr; + boolean_t res = zfs_rangelock_tryenter(&zp->z_rangelock, &lr, io_off, + io_len, RL_READER); + if (res == B_FALSE) { /* * It is important to drop the page lock before grabbing the * rangelock to avoid another deadlock between here and @@ -4138,13 +4139,13 @@ zfs_getpage(struct inode *ip, struct page *pp) */ get_page(pp); unlock_page(pp); - lr = zfs_rangelock_enter(&zp->z_rangelock, io_off, - io_len, RL_READER); + zfs_rangelock_enter(&zp->z_rangelock, &lr, io_off, io_len, + RL_READER); lock_page(pp); put_page(pp); } error = zfs_fillpage(ip, pp); - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); if (error == 0) dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, PAGE_SIZE); diff --git a/module/os/linux/zfs/zfs_znode_os.c b/module/os/linux/zfs/zfs_znode_os.c index aff7b1f4dac1..a556ef8d253e 100644 --- a/module/os/linux/zfs/zfs_znode_os.c +++ b/module/os/linux/zfs/zfs_znode_os.c @@ -1477,20 +1477,20 @@ zfs_extend(znode_t *zp, uint64_t end) { zfsvfs_t *zfsvfs = ZTOZSB(zp); dmu_tx_t *tx; - zfs_locked_range_t *lr; + zfs_locked_range_t lr; uint64_t newblksz; int error; /* * We will change zp_size, lock the whole file. */ - lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER); + zfs_rangelock_enter(&zp->z_rangelock, &lr, 0, UINT64_MAX, RL_WRITER); /* * Nothing to do if file already at desired length. */ if (end <= zp->z_size) { - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); return (0); } tx = dmu_tx_create(zfsvfs->z_os); @@ -1520,7 +1520,7 @@ zfs_extend(znode_t *zp, uint64_t end) error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); return (error); } @@ -1532,7 +1532,7 @@ zfs_extend(znode_t *zp, uint64_t end) VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(ZTOZSB(zp)), &zp->z_size, sizeof (zp->z_size), tx)); - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); dmu_tx_commit(tx); @@ -1595,19 +1595,19 @@ static int zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) { zfsvfs_t *zfsvfs = ZTOZSB(zp); - zfs_locked_range_t *lr; + zfs_locked_range_t lr; int error; /* * Lock the range being freed. */ - lr = zfs_rangelock_enter(&zp->z_rangelock, off, len, RL_WRITER); + zfs_rangelock_enter(&zp->z_rangelock, &lr, off, len, RL_WRITER); /* * Nothing to do if file already at desired length. */ if (off >= zp->z_size) { - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); return (0); } @@ -1657,7 +1657,7 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) page_len); } } - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); return (error); } @@ -1675,7 +1675,7 @@ zfs_trunc(znode_t *zp, uint64_t end) { zfsvfs_t *zfsvfs = ZTOZSB(zp); dmu_tx_t *tx; - zfs_locked_range_t *lr; + zfs_locked_range_t lr; int error; sa_bulk_attr_t bulk[2]; int count = 0; @@ -1683,20 +1683,20 @@ zfs_trunc(znode_t *zp, uint64_t end) /* * We will change zp_size, lock the whole file. */ - lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER); + zfs_rangelock_enter(&zp->z_rangelock, &lr, 0, UINT64_MAX, RL_WRITER); /* * Nothing to do if file already at desired length. */ if (end >= zp->z_size) { - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); return (0); } error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end, DMU_OBJECT_END); if (error) { - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); return (error); } tx = dmu_tx_create(zfsvfs->z_os); @@ -1706,7 +1706,7 @@ zfs_trunc(znode_t *zp, uint64_t end) error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); return (error); } @@ -1722,7 +1722,7 @@ zfs_trunc(znode_t *zp, uint64_t end) VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0); dmu_tx_commit(tx); - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); return (0); } diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c index 7c9aae6a66af..521f4e2da7ef 100644 --- a/module/os/linux/zfs/zvol_os.c +++ b/module/os/linux/zfs/zvol_os.c @@ -275,8 +275,9 @@ zvol_write(zv_request_t *zvr) boolean_t sync = io_is_fua(bio, rq) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS; - zfs_locked_range_t *lr = zfs_rangelock_enter(&zv->zv_rangelock, - uio.uio_loffset, uio.uio_resid, RL_WRITER); + zfs_locked_range_t lr; + zfs_rangelock_enter(&zv->zv_rangelock, &lr, uio.uio_loffset, + uio.uio_resid, RL_WRITER); uint64_t volsize = zv->zv_volsize; while (uio.uio_resid > 0 && uio.uio_loffset < volsize) { @@ -304,7 +305,7 @@ zvol_write(zv_request_t *zvr) if (error) break; } - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); int64_t nwritten = start_resid - uio.uio_resid; dataset_kstats_update_write_kstats(&zv->zv_kstat, nwritten); @@ -381,8 +382,8 @@ zvol_discard(zv_request_t *zvr) if (start >= end) goto unlock; - zfs_locked_range_t *lr = zfs_rangelock_enter(&zv->zv_rangelock, - start, size, RL_WRITER); + zfs_locked_range_t lr; + zfs_rangelock_enter(&zv->zv_rangelock, &lr, start, size, RL_WRITER); tx = dmu_tx_create(zv->zv_objset); dmu_tx_mark_netfree(tx); @@ -395,7 +396,7 @@ zvol_discard(zv_request_t *zvr) error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, start, size); } - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); if (error == 0 && sync) zil_commit(zv->zv_zilog, ZVOL_OBJ); @@ -453,8 +454,9 @@ zvol_read(zv_request_t *zvr) bio); } - zfs_locked_range_t *lr = zfs_rangelock_enter(&zv->zv_rangelock, - uio.uio_loffset, uio.uio_resid, RL_READER); + zfs_locked_range_t lr; + zfs_rangelock_enter(&zv->zv_rangelock, &lr, uio.uio_loffset, + uio.uio_resid, RL_READER); uint64_t volsize = zv->zv_volsize; @@ -473,7 +475,7 @@ zvol_read(zv_request_t *zvr) break; } } - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); int64_t nread = start_resid - uio.uio_resid; dataset_kstats_update_read_kstats(&zv->zv_kstat, nread); diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c index 6103f780e6bc..83cac5ec21a7 100644 --- a/module/zfs/vdev_raidz.c +++ b/module/zfs/vdev_raidz.c @@ -2534,7 +2534,8 @@ vdev_raidz_io_start(zio_t *zio) uint64_t logical_width = vdev_raidz_get_logical_width(vdrz, BP_GET_BIRTH(zio->io_bp)); if (logical_width != vdrz->vd_physical_width) { - zfs_locked_range_t *lr = NULL; + zfs_locked_range_t *lr = kmem_alloc( + sizeof (zfs_locked_range_t), KM_SLEEP); uint64_t synced_offset = UINT64_MAX; uint64_t next_offset = UINT64_MAX; boolean_t use_scratch = B_FALSE; @@ -2553,7 +2554,7 @@ vdev_raidz_io_start(zio_t *zio) if (vdrz->vn_vre.vre_state == DSS_SCANNING) { ASSERT3P(vd->vdev_spa->spa_raidz_expand, ==, &vdrz->vn_vre); - lr = zfs_rangelock_enter(&vdrz->vn_vre.vre_rangelock, + zfs_rangelock_enter(&vdrz->vn_vre.vre_rangelock, lr, zio->io_offset, zio->io_size, RL_READER); use_scratch = (RRSS_GET_STATE(&vd->vdev_spa->spa_ubsync) == @@ -3578,6 +3579,7 @@ vdev_raidz_io_done(zio_t *zio) done: if (rm->rm_lr != NULL) { zfs_rangelock_exit(rm->rm_lr); + kmem_free(rm->rm_lr, sizeof (zfs_locked_range_t)); rm->rm_lr = NULL; } } @@ -3723,9 +3725,9 @@ raidz_reflow_sync(void *arg, dmu_tx_t *tx) VERIFY3U(vre->vre_failed_offset, >=, old_offset); mutex_exit(&vre->vre_lock); - zfs_locked_range_t *lr = zfs_rangelock_enter(&vre->vre_rangelock, - old_offset, new_offset - old_offset, - RL_WRITER); + zfs_locked_range_t lr; + zfs_rangelock_enter(&vre->vre_rangelock, &lr, old_offset, + new_offset - old_offset, RL_WRITER); /* * Update the uberblock that will be written when this txg completes. @@ -3733,7 +3735,7 @@ raidz_reflow_sync(void *arg, dmu_tx_t *tx) RAIDZ_REFLOW_SET(&spa->spa_uberblock, RRSS_SCRATCH_INVALID_SYNCED_REFLOW, new_offset); vre->vre_offset_pertxg[txgoff] = 0; - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); mutex_enter(&vre->vre_lock); vre->vre_bytes_copied += vre->vre_bytes_copied_pertxg[txgoff]; @@ -3830,8 +3832,8 @@ raidz_reflow_complete_sync(void *arg, dmu_tx_t *tx) * State of one copy batch. */ typedef struct raidz_reflow_arg { + zfs_locked_range_t rra_lr; /* Range lock of this batch. */ vdev_raidz_expand_t *rra_vre; /* Global expantion state. */ - zfs_locked_range_t *rra_lr; /* Range lock of this batch. */ uint64_t rra_txg; /* TXG of this batch. */ uint_t rra_ashift; /* Ashift of the vdev. */ uint32_t rra_tbd; /* Number of in-flight ZIOs. */ @@ -3855,11 +3857,11 @@ raidz_reflow_write_done(zio_t *zio) if (zio->io_error != 0) { /* Force a reflow pause on errors */ vre->vre_failed_offset = - MIN(vre->vre_failed_offset, rra->rra_lr->lr_offset); + MIN(vre->vre_failed_offset, rra->rra_lr.lr_offset); } ASSERT3U(vre->vre_outstanding_bytes, >=, zio->io_size); vre->vre_outstanding_bytes -= zio->io_size; - if (rra->rra_lr->lr_offset + rra->rra_lr->lr_length < + if (rra->rra_lr.lr_offset + rra->rra_lr.lr_length < vre->vre_failed_offset) { vre->vre_bytes_copied_pertxg[rra->rra_txg & TXG_MASK] += zio->io_size; @@ -3870,8 +3872,9 @@ raidz_reflow_write_done(zio_t *zio) if (!done) return; + spa_config_exit(zio->io_spa, SCL_STATE, zio->io_spa); - zfs_rangelock_exit(rra->rra_lr); + zfs_rangelock_exit(&rra->rra_lr); kmem_free(rra, sizeof (*rra) + sizeof (zio_t *) * rra->rra_writes); } @@ -3899,8 +3902,8 @@ raidz_reflow_read_done(zio_t *zio) if (zio->io_error != 0 || !vdev_dtl_empty(zio->io_vd, DTL_MISSING)) { zfs_dbgmsg("reflow read failed off=%llu size=%llu txg=%llu " "err=%u partial_dtl_empty=%u missing_dtl_empty=%u", - (long long)rra->rra_lr->lr_offset, - (long long)rra->rra_lr->lr_length, + (long long)rra->rra_lr.lr_offset, + (long long)rra->rra_lr.lr_length, (long long)rra->rra_txg, zio->io_error, vdev_dtl_empty(zio->io_vd, DTL_PARTIAL), @@ -3908,7 +3911,7 @@ raidz_reflow_read_done(zio_t *zio) mutex_enter(&vre->vre_lock); /* Force a reflow pause on errors */ vre->vre_failed_offset = - MIN(vre->vre_failed_offset, rra->rra_lr->lr_offset); + MIN(vre->vre_failed_offset, rra->rra_lr.lr_offset); mutex_exit(&vre->vre_lock); } @@ -4008,7 +4011,7 @@ raidz_reflow_impl(vdev_t *vd, vdev_raidz_expand_t *vre, range_tree_t *rt, raidz_reflow_arg_t *rra = kmem_zalloc(sizeof (*rra) + sizeof (zio_t *) * writes, KM_SLEEP); rra->rra_vre = vre; - rra->rra_lr = zfs_rangelock_enter(&vre->vre_rangelock, + zfs_rangelock_enter(&vre->vre_rangelock, &rra->rra_lr, offset, size, RL_WRITER); rra->rra_txg = dmu_tx_get_txg(tx); rra->rra_ashift = ashift; @@ -4027,18 +4030,18 @@ raidz_reflow_impl(vdev_t *vd, vdev_raidz_expand_t *vre, range_tree_t *rt, if (vdev_raidz_expand_child_replacing(vd)) { zfs_dbgmsg("replacing vdev encountered, reflow paused at " "offset=%llu txg=%llu", - (long long)rra->rra_lr->lr_offset, + (long long)rra->rra_lr.lr_offset, (long long)rra->rra_txg); mutex_enter(&vre->vre_lock); vre->vre_failed_offset = - MIN(vre->vre_failed_offset, rra->rra_lr->lr_offset); + MIN(vre->vre_failed_offset, rra->rra_lr.lr_offset); cv_signal(&vre->vre_cv); mutex_exit(&vre->vre_lock); /* drop everything we acquired */ spa_config_exit(spa, SCL_STATE, spa); - zfs_rangelock_exit(rra->rra_lr); + zfs_rangelock_exit(&rra->rra_lr); kmem_free(rra, sizeof (*rra) + sizeof (zio_t *) * writes); return (B_TRUE); } @@ -4152,8 +4155,9 @@ raidz_reflow_scratch_sync(void *arg, dmu_tx_t *tx) VERIFY3U(write_size, <=, VDEV_BOOT_SIZE); VERIFY3U(write_size, <=, read_size); - zfs_locked_range_t *lr = zfs_rangelock_enter(&vre->vre_rangelock, - 0, logical_size, RL_WRITER); + zfs_locked_range_t lr; + zfs_rangelock_enter(&vre->vre_rangelock, &lr, 0, logical_size, + RL_WRITER); abd_t **abds = kmem_alloc(raidvd->vdev_children * sizeof (abd_t *), KM_SLEEP); @@ -4215,7 +4219,7 @@ raidz_reflow_scratch_sync(void *arg, dmu_tx_t *tx) for (int i = 0; i < raidvd->vdev_children; i++) abd_free(abds[i]); kmem_free(abds, raidvd->vdev_children * sizeof (abd_t *)); - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); spa_config_exit(spa, SCL_STATE, FTAG); return; } @@ -4367,7 +4371,7 @@ raidz_reflow_scratch_sync(void *arg, dmu_tx_t *tx) * Update progress. */ vre->vre_offset = logical_size; - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); spa_config_exit(spa, SCL_STATE, FTAG); int txgoff = dmu_tx_get_txg(tx) & TXG_MASK; diff --git a/module/zfs/zfs_rlock.c b/module/zfs/zfs_rlock.c index f42661df82e4..3c1a85dad1c5 100644 --- a/module/zfs/zfs_rlock.c +++ b/module/zfs/zfs_rlock.c @@ -478,25 +478,23 @@ zfs_rangelock_enter_reader(zfs_rangelock_t *rl, zfs_locked_range_t *new, * entire file is locked as RL_WRITER), or NULL if nonblock is true and the * lock could not be acquired immediately. */ -static zfs_locked_range_t * -zfs_rangelock_enter_impl(zfs_rangelock_t *rl, uint64_t off, uint64_t len, - zfs_rangelock_type_t type, boolean_t nonblock) +static boolean_t +zfs_rangelock_enter_impl(zfs_rangelock_t *rl, zfs_locked_range_t *lr, + uint64_t off, uint64_t len, zfs_rangelock_type_t type, boolean_t nonblock) { - zfs_locked_range_t *new; - + boolean_t success = B_TRUE; ASSERT(type == RL_READER || type == RL_WRITER || type == RL_APPEND); - new = kmem_alloc(sizeof (zfs_locked_range_t), KM_SLEEP); - new->lr_rangelock = rl; - new->lr_offset = off; + lr->lr_rangelock = rl; + lr->lr_offset = off; if (len + off < off) /* overflow */ len = UINT64_MAX - off; - new->lr_length = len; - new->lr_count = 1; /* assume it's going to be in the tree */ - new->lr_type = type; - new->lr_proxy = B_FALSE; - new->lr_write_wanted = B_FALSE; - new->lr_read_wanted = B_FALSE; + lr->lr_length = len; + lr->lr_count = 1; /* assume it's going to be in the tree */ + lr->lr_type = type; + lr->lr_proxy = B_FALSE; + lr->lr_write_wanted = B_FALSE; + lr->lr_read_wanted = B_FALSE; mutex_enter(&rl->rl_lock); if (type == RL_READER) { @@ -504,31 +502,29 @@ zfs_rangelock_enter_impl(zfs_rangelock_t *rl, uint64_t off, uint64_t len, * First check for the usual case of no locks */ if (avl_numnodes(&rl->rl_tree) == 0) { - avl_add(&rl->rl_tree, new); - } else if (!zfs_rangelock_enter_reader(rl, new, nonblock)) { - kmem_free(new, sizeof (*new)); - new = NULL; + avl_add(&rl->rl_tree, lr); + } else if (!zfs_rangelock_enter_reader(rl, lr, nonblock)) { + success = B_FALSE; } - } else if (!zfs_rangelock_enter_writer(rl, new, nonblock)) { - kmem_free(new, sizeof (*new)); - new = NULL; + } else if (!zfs_rangelock_enter_writer(rl, lr, nonblock)) { + success = B_FALSE; } mutex_exit(&rl->rl_lock); - return (new); + return (success); } -zfs_locked_range_t * -zfs_rangelock_enter(zfs_rangelock_t *rl, uint64_t off, uint64_t len, - zfs_rangelock_type_t type) +void +zfs_rangelock_enter(zfs_rangelock_t *rl, zfs_locked_range_t *lr, uint64_t off, + uint64_t len, zfs_rangelock_type_t type) { - return (zfs_rangelock_enter_impl(rl, off, len, type, B_FALSE)); + (void) zfs_rangelock_enter_impl(rl, lr, off, len, type, B_FALSE); } -zfs_locked_range_t * -zfs_rangelock_tryenter(zfs_rangelock_t *rl, uint64_t off, uint64_t len, - zfs_rangelock_type_t type) +boolean_t +zfs_rangelock_tryenter(zfs_rangelock_t *rl, zfs_locked_range_t *lr, + uint64_t off, uint64_t len, zfs_rangelock_type_t type) { - return (zfs_rangelock_enter_impl(rl, off, len, type, B_TRUE)); + return (zfs_rangelock_enter_impl(rl, lr, off, len, type, B_TRUE)); } /* @@ -542,8 +538,6 @@ zfs_rangelock_free(zfs_locked_range_t *lr) if (lr->lr_read_wanted) cv_destroy(&lr->lr_read_cv); - - kmem_free(lr, sizeof (zfs_locked_range_t)); } /* diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index c01a9cf5d0b2..a44e6d78695d 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -119,7 +119,7 @@ zfs_fsync(znode_t *zp, int syncflag, cred_t *cr) static int zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off) { - zfs_locked_range_t *lr; + zfs_locked_range_t lr; uint64_t noff = (uint64_t)*off; /* new offset */ uint64_t file_sz; int error; @@ -139,9 +139,9 @@ zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off) if (zn_has_cached_data(zp, 0, file_sz - 1)) zn_flush_cached_data(zp, B_TRUE); - lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_READER); + zfs_rangelock_enter(&zp->z_rangelock, &lr, 0, UINT64_MAX, RL_READER); error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff); - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); if (error == ESRCH) return (SET_ERROR(ENXIO)); @@ -361,7 +361,8 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) /* * Lock the range against changes. */ - zfs_locked_range_t *lr = zfs_rangelock_enter(&zp->z_rangelock, + zfs_locked_range_t lr; + zfs_rangelock_enter(&zp->z_rangelock, &lr, zfs_uio_offset(uio), zfs_uio_resid(uio), RL_READER); /* @@ -496,7 +497,7 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, nread); out: - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); if (dio_checksum_failure == B_TRUE) uio->uio_extflg |= UIO_DIRECT; @@ -666,15 +667,15 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) /* * If in append mode, set the io offset pointer to eof. */ - zfs_locked_range_t *lr; + zfs_locked_range_t lr; if (ioflag & O_APPEND) { /* * Obtain an appending range lock to guarantee file append * semantics. We reset the write offset once we have the lock. */ - lr = zfs_rangelock_enter(&zp->z_rangelock, 0, n, RL_APPEND); - woff = lr->lr_offset; - if (lr->lr_length == UINT64_MAX) { + zfs_rangelock_enter(&zp->z_rangelock, &lr, 0, n, RL_APPEND); + woff = lr.lr_offset; + if (lr.lr_length == UINT64_MAX) { /* * We overlocked the file because this write will cause * the file block size to increase. @@ -695,11 +696,11 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) * this write, then this range lock will lock the entire file * so that we can re-write the block safely. */ - lr = zfs_rangelock_enter(&zp->z_rangelock, woff, n, RL_WRITER); + zfs_rangelock_enter(&zp->z_rangelock, &lr, woff, n, RL_WRITER); } if (zn_rlimit_fsize_uio(zp, uio)) { - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); zfs_exit(zfsvfs, FTAG); return (SET_ERROR(EFBIG)); } @@ -707,7 +708,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) const rlim64_t limit = MAXOFFSET_T; if (woff >= limit) { - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); zfs_exit(zfsvfs, FTAG); return (SET_ERROR(EFBIG)); } @@ -746,7 +747,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) * through the ARC; however, the following 3 1K requests will * use Direct I/O. */ - if (uio->uio_extflg & UIO_DIRECT && lr->lr_length == UINT64_MAX) { + if (uio->uio_extflg & UIO_DIRECT && lr.lr_length == UINT64_MAX) { uio->uio_extflg &= ~UIO_DIRECT; o_direct_defer = B_TRUE; } @@ -769,7 +770,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) } uint64_t blksz; - if (lr->lr_length == UINT64_MAX && zp->z_size <= zp->z_blksz) { + if (lr.lr_length == UINT64_MAX && zp->z_size <= zp->z_blksz) { if (zp->z_blksz > zfsvfs->z_max_blksz && !ISP2(zp->z_blksz)) { /* @@ -852,9 +853,9 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) * on the first iteration since rangelock_reduce() will * shrink down lr_length to the appropriate size. */ - if (lr->lr_length == UINT64_MAX) { + if (lr.lr_length == UINT64_MAX) { zfs_grow_blocksize(zp, blksz, tx); - zfs_rangelock_reduce(lr, woff, n); + zfs_rangelock_reduce(&lr, woff, n); } ssize_t tx_bytes; @@ -1019,7 +1020,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) } zfs_znode_update_vfs(zp); - zfs_rangelock_exit(lr); + zfs_rangelock_exit(&lr); /* * Cleanup for Direct I/O if requested. @@ -1145,8 +1146,8 @@ zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf, * we don't have to write the data twice. */ if (buf != NULL) { /* immediate write */ - zgd->zgd_lr = zfs_rangelock_enter(&zp->z_rangelock, offset, - size, RL_READER); + zfs_rangelock_enter(&zp->z_rangelock, &zgd->zgd_lr, + offset, size, RL_READER); /* test for truncation needs to be done while range locked */ if (offset >= zp->z_size) { error = SET_ERROR(ENOENT); @@ -1168,12 +1169,12 @@ zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf, size = zp->z_blksz; blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; offset -= blkoff; - zgd->zgd_lr = zfs_rangelock_enter(&zp->z_rangelock, + zfs_rangelock_enter(&zp->z_rangelock, &zgd->zgd_lr, offset, size, RL_READER); if (zp->z_blksz == size) break; offset += blkoff; - zfs_rangelock_exit(zgd->zgd_lr); + zfs_rangelock_exit(&zgd->zgd_lr); } /* test for truncation needs to be done while range locked */ if (lr->lr_offset >= zp->z_size) @@ -1265,7 +1266,7 @@ zfs_get_done(zgd_t *zgd, int error) if (zgd->zgd_db) dmu_buf_rele(zgd->zgd_db, zgd); - zfs_rangelock_exit(zgd->zgd_lr); + zfs_rangelock_exit(&zgd->zgd_lr); /* * Release the vnode asynchronously as we currently have the @@ -1332,7 +1333,7 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp, { zfsvfs_t *inzfsvfs, *outzfsvfs; objset_t *inos, *outos; - zfs_locked_range_t *inlr, *outlr; + zfs_locked_range_t inlr, outlr; dmu_buf_impl_t *db; dmu_tx_t *tx; zilog_t *zilog; @@ -1475,14 +1476,14 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp, * Maintain predictable lock order. */ if (inzp < outzp || (inzp == outzp && inoff < outoff)) { - inlr = zfs_rangelock_enter(&inzp->z_rangelock, inoff, len, + zfs_rangelock_enter(&inzp->z_rangelock, &inlr, inoff, len, RL_READER); - outlr = zfs_rangelock_enter(&outzp->z_rangelock, outoff, len, + zfs_rangelock_enter(&outzp->z_rangelock, &outlr, outoff, len, RL_WRITER); } else { - outlr = zfs_rangelock_enter(&outzp->z_rangelock, outoff, len, + zfs_rangelock_enter(&outzp->z_rangelock, &outlr, outoff, len, RL_WRITER); - inlr = zfs_rangelock_enter(&inzp->z_rangelock, inoff, len, + zfs_rangelock_enter(&inzp->z_rangelock, &inlr, inoff, len, RL_READER); } @@ -1500,7 +1501,7 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp, goto unlock; } if (inblksz != outzp->z_blksz && (outzp->z_size > outzp->z_blksz || - outlr->lr_length != UINT64_MAX)) { + outlr.lr_length != UINT64_MAX)) { error = SET_ERROR(EINVAL); goto unlock; } @@ -1632,7 +1633,7 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp, * on the first iteration since zfs_rangelock_reduce() will * shrink down lr_length to the appropriate size. */ - if (outlr->lr_length == UINT64_MAX) { + if (outlr.lr_length == UINT64_MAX) { zfs_grow_blocksize(outzp, inblksz, tx); /* @@ -1649,7 +1650,7 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp, * Round range lock up to the block boundary, so we * prevent appends until we are done. */ - zfs_rangelock_reduce(outlr, outoff, + zfs_rangelock_reduce(&outlr, outoff, ((len - 1) / inblksz + 1) * inblksz); } @@ -1703,8 +1704,8 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp, zfs_znode_update_vfs(outzp); unlock: - zfs_rangelock_exit(outlr); - zfs_rangelock_exit(inlr); + zfs_rangelock_exit(&outlr); + zfs_rangelock_exit(&inlr); if (done > 0) { /* diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index fec595b2c4c5..06d58f361062 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -655,7 +655,7 @@ zvol_get_done(zgd_t *zgd, int error) if (zgd->zgd_db) dmu_buf_rele(zgd->zgd_db, zgd); - zfs_rangelock_exit(zgd->zgd_lr); + zfs_rangelock_exit(&zgd->zgd_lr); kmem_free(zgd, sizeof (zgd_t)); } @@ -688,7 +688,7 @@ zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf, * we don't have to write the data twice. */ if (buf != NULL) { /* immediate write */ - zgd->zgd_lr = zfs_rangelock_enter(&zv->zv_rangelock, offset, + zfs_rangelock_enter(&zv->zv_rangelock, &zgd->zgd_lr, offset, size, RL_READER); error = dmu_read_by_dnode(zv->zv_dn, offset, size, buf, DMU_READ_NO_PREFETCH); @@ -702,7 +702,7 @@ zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf, */ size = zv->zv_volblocksize; offset = P2ALIGN_TYPED(offset, size, uint64_t); - zgd->zgd_lr = zfs_rangelock_enter(&zv->zv_rangelock, offset, + zfs_rangelock_enter(&zv->zv_rangelock, &zgd->zgd_lr, offset, size, RL_READER); error = dmu_buf_hold_noread_by_dnode(zv->zv_dn, offset, zgd, &db);