diff --git a/config/kernel-vfs-direct_IO.m4 b/config/kernel-vfs-direct_IO.m4 deleted file mode 100644 index 17605a13fdef..000000000000 --- a/config/kernel-vfs-direct_IO.m4 +++ /dev/null @@ -1,57 +0,0 @@ -dnl # -dnl # Check for Direct I/O interfaces. -dnl # -AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_DIRECT_IO], [ - ZFS_LINUX_TEST_SRC([direct_io_iter], [ - #include - - static ssize_t test_direct_IO(struct kiocb *kiocb, - struct iov_iter *iter) { return 0; } - - static const struct address_space_operations - aops __attribute__ ((unused)) = { - .direct_IO = test_direct_IO, - }; - ],[]) - - ZFS_LINUX_TEST_SRC([direct_io_iter_offset], [ - #include - - static ssize_t test_direct_IO(struct kiocb *kiocb, - struct iov_iter *iter, loff_t offset) { return 0; } - - static const struct address_space_operations - aops __attribute__ ((unused)) = { - .direct_IO = test_direct_IO, - }; - ],[]) -]) - -AC_DEFUN([ZFS_AC_KERNEL_VFS_DIRECT_IO], [ - dnl # - dnl # Linux 4.6.x API change - dnl # - AC_MSG_CHECKING([whether aops->direct_IO() uses iov_iter]) - ZFS_LINUX_TEST_RESULT([direct_io_iter], [ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_VFS_DIRECT_IO_ITER, 1, - [aops->direct_IO() uses iov_iter without rw]) - ],[ - AC_MSG_RESULT([no]) - - dnl # - dnl # Linux 4.1.x API change - dnl # - AC_MSG_CHECKING( - [whether aops->direct_IO() uses offset]) - ZFS_LINUX_TEST_RESULT([direct_io_iter_offset], [ - AC_MSG_RESULT([yes]) - AC_DEFINE(HAVE_VFS_DIRECT_IO_ITER_OFFSET, 1, - [aops->direct_IO() uses iov_iter with offset]) - - ],[ - AC_MSG_RESULT([no]) - ZFS_LINUX_TEST_ERROR([Direct I/O]) - ]) - ]) -]) diff --git a/config/kernel-vfs-iov_iter.m4 b/config/kernel-vfs-iov_iter.m4 index ed7961a9e9dd..6c0e46460835 100644 --- a/config/kernel-vfs-iov_iter.m4 +++ b/config/kernel-vfs-iov_iter.m4 @@ -59,7 +59,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_IOV_ITER], [ ]) AC_DEFUN([ZFS_AC_KERNEL_VFS_IOV_ITER], [ - enable_vfs_iov_iter="yes" AC_MSG_CHECKING([whether fault_in_iov_iter_readable() is available]) ZFS_LINUX_TEST_RESULT([fault_in_iov_iter_readable], [ @@ -78,17 +77,8 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_IOV_ITER], [ AC_MSG_RESULT(yes) AC_DEFINE(HAVE_IOV_ITER_GET_PAGES2, 1, [iov_iter_get_pages2() is available]) - ], [ + ],[ AC_MSG_RESULT(no) - AC_MSG_CHECKING([whether iov_iter_get_pages() is available]) - ZFS_LINUX_TEST_RESULT([iov_iter_get_pages], [ - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_IOV_ITER_GET_PAGES, 1, - [iov_iter_get_pages() is available]) - ], [ - AC_MSG_RESULT(no) - enable_vfs_iov_iter="no" - ]) ]) dnl # @@ -105,17 +95,6 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_IOV_ITER], [ AC_MSG_RESULT(no) ]) - dnl # - dnl # As of the 4.9 kernel support is provided for iovecs, kvecs, - dnl # bvecs and pipes in the iov_iter structure. As long as the - dnl # other support interfaces are all available the iov_iter can - dnl # be correctly used in the uio structure. - dnl # - AS_IF([test "x$enable_vfs_iov_iter" = "xyes"], [ - AC_DEFINE(HAVE_VFS_IOV_ITER, 1, - [All required iov_iter interfaces are available]) - ]) - dnl # dnl # Kernel 6.5 introduces the iter_iov() function that returns the dnl # __iov member of an iov_iter*. The iov member was renamed to this diff --git a/config/kernel.m4 b/config/kernel.m4 index 78f178ff27ac..49ec6266e87a 100644 --- a/config/kernel.m4 +++ b/config/kernel.m4 @@ -79,7 +79,6 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_VFS_READ_FOLIO ZFS_AC_KERNEL_SRC_VFS_MIGRATE_FOLIO ZFS_AC_KERNEL_SRC_VFS_FSYNC_2ARGS - ZFS_AC_KERNEL_SRC_VFS_DIRECT_IO ZFS_AC_KERNEL_SRC_VFS_READPAGES ZFS_AC_KERNEL_SRC_VFS_SET_PAGE_DIRTY_NOBUFFERS ZFS_AC_KERNEL_SRC_VFS_IOV_ITER @@ -190,7 +189,6 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_VFS_READ_FOLIO ZFS_AC_KERNEL_VFS_MIGRATE_FOLIO ZFS_AC_KERNEL_VFS_FSYNC_2ARGS - ZFS_AC_KERNEL_VFS_DIRECT_IO ZFS_AC_KERNEL_VFS_READPAGES ZFS_AC_KERNEL_VFS_SET_PAGE_DIRTY_NOBUFFERS ZFS_AC_KERNEL_VFS_IOV_ITER diff --git a/include/os/linux/spl/sys/uio.h b/include/os/linux/spl/sys/uio.h index 5d483685eb20..9e7afea2ab34 100644 --- a/include/os/linux/spl/sys/uio.h +++ b/include/os/linux/spl/sys/uio.h @@ -40,7 +40,7 @@ */ #define UIO_DIRECT 0x0001 /* Direct I/O request */ -#if defined(HAVE_VFS_IOV_ITER) && defined(HAVE_FAULT_IN_IOV_ITER_READABLE) +#if defined(HAVE_FAULT_IN_IOV_ITER_READABLE) #define iov_iter_fault_in_readable(a, b) fault_in_iov_iter_readable(a, b) #endif @@ -52,12 +52,9 @@ typedef enum zfs_uio_rw { } zfs_uio_rw_t; typedef enum zfs_uio_seg { - UIO_USERSPACE = 0, - UIO_SYSSPACE = 1, - UIO_BVEC = 2, -#if defined(HAVE_VFS_IOV_ITER) - UIO_ITER = 3, -#endif + UIO_SYSSPACE = 0, + UIO_BVEC = 1, + UIO_ITER = 2, } zfs_uio_seg_t; /* @@ -72,9 +69,7 @@ typedef struct zfs_uio { union { const struct iovec *uio_iov; const struct bio_vec *uio_bvec; -#if defined(HAVE_VFS_IOV_ITER) struct iov_iter *uio_iter; -#endif }; int uio_iovcnt; /* Number of iovecs */ offset_t uio_soffset; /* Starting logical offset */ @@ -129,7 +124,7 @@ zfs_uio_iovec_init(zfs_uio_t *uio, const struct iovec *iov, unsigned long nr_segs, offset_t offset, zfs_uio_seg_t seg, ssize_t resid, size_t skip) { - ASSERT(seg == UIO_USERSPACE || seg == UIO_SYSSPACE); + ASSERT(seg == UIO_SYSSPACE); uio->uio_iov = iov; uio->uio_iovcnt = nr_segs; @@ -175,7 +170,6 @@ zfs_uio_bvec_init(zfs_uio_t *uio, struct bio *bio, struct request *rq) memset(&uio->uio_dio, 0, sizeof (zfs_uio_dio_t)); } -#if defined(HAVE_VFS_IOV_ITER) static inline void zfs_uio_iov_iter_init(zfs_uio_t *uio, struct iov_iter *iter, offset_t offset, ssize_t resid, size_t skip) @@ -192,7 +186,6 @@ zfs_uio_iov_iter_init(zfs_uio_t *uio, struct iov_iter *iter, offset_t offset, uio->uio_soffset = uio->uio_loffset; memset(&uio->uio_dio, 0, sizeof (zfs_uio_dio_t)); } -#endif /* HAVE_VFS_IOV_ITER */ #if defined(HAVE_ITER_IOV) #define zfs_uio_iter_iov(iter) iter_iov((iter)) diff --git a/lib/libspl/include/sys/uio.h b/lib/libspl/include/sys/uio.h index 16749fa492e5..f86be64ce7f3 100644 --- a/lib/libspl/include/sys/uio.h +++ b/lib/libspl/include/sys/uio.h @@ -57,8 +57,7 @@ typedef enum zfs_uio_rw { } zfs_uio_rw_t; typedef enum zfs_uio_seg { - UIO_USERSPACE = 0, - UIO_SYSSPACE = 1, + UIO_SYSSPACE = 0, } zfs_uio_seg_t; #elif defined(__FreeBSD__) diff --git a/module/os/linux/zfs/zfs_uio.c b/module/os/linux/zfs/zfs_uio.c index f08415fdb2e3..ed11f8b63fbf 100644 --- a/module/os/linux/zfs/zfs_uio.c +++ b/module/os/linux/zfs/zfs_uio.c @@ -68,55 +68,13 @@ zfs_uiomove_iov(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) size_t skip = uio->uio_skip; ulong_t cnt; + ASSERT3S(uio->uio_segflg, ==, UIO_SYSSPACE); while (n && uio->uio_resid) { cnt = MIN(iov->iov_len - skip, n); - switch (uio->uio_segflg) { - case UIO_USERSPACE: - /* - * p = kernel data pointer - * iov->iov_base = user data pointer - */ - if (rw == UIO_READ) { - if (copy_to_user(iov->iov_base+skip, p, cnt)) - return (EFAULT); - } else { - unsigned long b_left = 0; - if (uio->uio_fault_disable) { - if (!zfs_access_ok(VERIFY_READ, - (iov->iov_base + skip), cnt)) { - return (EFAULT); - } - pagefault_disable(); - b_left = - __copy_from_user_inatomic(p, - (iov->iov_base + skip), cnt); - pagefault_enable(); - } else { - b_left = - copy_from_user(p, - (iov->iov_base + skip), cnt); - } - if (b_left > 0) { - unsigned long c_bytes = - cnt - b_left; - uio->uio_skip += c_bytes; - ASSERT3U(uio->uio_skip, <, - iov->iov_len); - uio->uio_resid -= c_bytes; - uio->uio_loffset += c_bytes; - return (EFAULT); - } - } - break; - case UIO_SYSSPACE: - if (rw == UIO_READ) - memcpy(iov->iov_base + skip, p, cnt); - else - memcpy(p, iov->iov_base + skip, cnt); - break; - default: - ASSERT(0); - } + if (rw == UIO_READ) + memcpy(iov->iov_base + skip, p, cnt); + else + memcpy(p, iov->iov_base + skip, cnt); skip += cnt; if (skip == iov->iov_len) { skip = 0; @@ -268,7 +226,6 @@ zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) return (zfs_uiomove_bvec_impl(p, n, rw, uio)); } -#if defined(HAVE_VFS_IOV_ITER) static int zfs_uiomove_iter(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, boolean_t revert) @@ -303,17 +260,14 @@ zfs_uiomove_iter(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, return (0); } -#endif int zfs_uiomove(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) { if (uio->uio_segflg == UIO_BVEC) return (zfs_uiomove_bvec(p, n, rw, uio)); -#if defined(HAVE_VFS_IOV_ITER) else if (uio->uio_segflg == UIO_ITER) return (zfs_uiomove_iter(p, n, rw, uio, B_FALSE)); -#endif else return (zfs_uiomove_iov(p, n, rw, uio)); } @@ -336,44 +290,14 @@ zfs_uio_prefaultpages(ssize_t n, zfs_uio_t *uio) * there is never a time for these pages a fault will occur. */ return (0); -#if defined(HAVE_VFS_IOV_ITER) - } else if (uio->uio_segflg == UIO_ITER) { + } else { + ASSERT3S(uio->uio_segflg, ==, UIO_ITER); /* - * At least a Linux 4.9 kernel, iov_iter_fault_in_readable() + * At least a Linux 4.18 kernel, iov_iter_fault_in_readable() * can be relied on to fault in user pages when referenced. */ if (iov_iter_fault_in_readable(uio->uio_iter, n)) return (EFAULT); -#endif - } else { - /* Fault in all user pages */ - ASSERT3S(uio->uio_segflg, ==, UIO_USERSPACE); - const struct iovec *iov = uio->uio_iov; - int iovcnt = uio->uio_iovcnt; - size_t skip = uio->uio_skip; - uint8_t tmp; - caddr_t p; - - for (; n > 0 && iovcnt > 0; iov++, iovcnt--, skip = 0) { - ulong_t cnt = MIN(iov->iov_len - skip, n); - /* empty iov */ - if (cnt == 0) - continue; - n -= cnt; - /* touch each page in this segment. */ - p = iov->iov_base + skip; - while (cnt) { - if (copy_from_user(&tmp, p, 1)) - return (EFAULT); - ulong_t incr = MIN(cnt, PAGESIZE); - p += incr; - cnt -= incr; - } - /* touch the last byte in case it straddles a page. */ - p--; - if (copy_from_user(&tmp, p, 1)) - return (EFAULT); - } } return (0); @@ -394,10 +318,8 @@ zfs_uiocopy(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, size_t *cbytes) if (uio->uio_segflg == UIO_BVEC) ret = zfs_uiomove_bvec(p, n, rw, &uio_copy); -#if defined(HAVE_VFS_IOV_ITER) else if (uio->uio_segflg == UIO_ITER) ret = zfs_uiomove_iter(p, n, rw, &uio_copy, B_TRUE); -#endif else ret = zfs_uiomove_iov(p, n, rw, &uio_copy); @@ -430,11 +352,10 @@ zfs_uioskip(zfs_uio_t *uio, size_t n) uio->uio_bvec++; uio->uio_iovcnt--; } -#if defined(HAVE_VFS_IOV_ITER) } else if (uio->uio_segflg == UIO_ITER) { iov_iter_advance(uio->uio_iter, n); -#endif } else { + ASSERT3S(uio->uio_segflg, ==, UIO_SYSSPACE); uio->uio_skip += n; while (uio->uio_iovcnt && uio->uio_skip >= uio->uio_iov->iov_len) { @@ -457,8 +378,7 @@ zfs_uio_page_aligned(zfs_uio_t *uio) { boolean_t aligned = B_TRUE; - if (uio->uio_segflg == UIO_USERSPACE || - uio->uio_segflg == UIO_SYSSPACE) { + if (uio->uio_segflg == UIO_SYSSPACE) { const struct iovec *iov = uio->uio_iov; size_t skip = uio->uio_skip; @@ -472,12 +392,10 @@ zfs_uio_page_aligned(zfs_uio_t *uio) } skip = 0; } -#if defined(HAVE_VFS_IOV_ITER) } else if (uio->uio_segflg == UIO_ITER) { unsigned long alignment = iov_iter_alignment(uio->uio_iter); aligned = IS_P2ALIGNED(alignment, PAGE_SIZE); -#endif } else { /* Currently not supported */ aligned = B_FALSE; @@ -578,76 +496,6 @@ zfs_uio_free_dio_pages(zfs_uio_t *uio, zfs_uio_rw_t rw) uio->uio_dio.npages * sizeof (struct page *)); } -/* - * zfs_uio_iov_step() is just a modified version of the STEP function of Linux's - * iov_iter_get_pages(). - */ -static int -zfs_uio_iov_step(struct iovec v, zfs_uio_rw_t rw, zfs_uio_t *uio, - long *numpages) -{ - unsigned long addr = (unsigned long)(v.iov_base); - size_t len = v.iov_len; - unsigned long n = DIV_ROUND_UP(len, PAGE_SIZE); - - /* - * read returning FOLL_WRITE is due to the fact that we are stating - * that the kernel will have write access to the user pages. So, when a - * Direct I/O read request is issued, the kernel must write to the user - * pages. - */ - long res = get_user_pages_unlocked( - P2ALIGN_TYPED(addr, PAGE_SIZE, unsigned long), n, - &uio->uio_dio.pages[uio->uio_dio.npages], - rw == UIO_READ ? FOLL_WRITE : 0); - if (res < 0) { - return (SET_ERROR(-res)); - } else if (len != (res * PAGE_SIZE)) { - return (SET_ERROR(EFAULT)); - } - - ASSERT3S(len, ==, res * PAGE_SIZE); - *numpages = res; - return (0); -} - -static int -zfs_uio_get_dio_pages_iov(zfs_uio_t *uio, zfs_uio_rw_t rw) -{ - const struct iovec *iovp = uio->uio_iov; - size_t skip = uio->uio_skip; - size_t len = uio->uio_resid - skip; - - ASSERT(uio->uio_segflg != UIO_SYSSPACE); - - for (int i = 0; i < uio->uio_iovcnt; i++) { - struct iovec iov; - long numpages = 0; - - if (iovp->iov_len == 0) { - iovp++; - skip = 0; - continue; - } - iov.iov_len = MIN(len, iovp->iov_len - skip); - iov.iov_base = iovp->iov_base + skip; - int error = zfs_uio_iov_step(iov, rw, uio, &numpages); - - if (error) - return (error); - - uio->uio_dio.npages += numpages; - len -= iov.iov_len; - skip = 0; - iovp++; - } - - ASSERT0(len); - - return (0); -} - -#if defined(HAVE_VFS_IOV_ITER) static int zfs_uio_get_dio_pages_iov_iter(zfs_uio_t *uio, zfs_uio_rw_t rw) { @@ -688,7 +536,6 @@ zfs_uio_get_dio_pages_iov_iter(zfs_uio_t *uio, zfs_uio_rw_t rw) return (0); } -#endif /* HAVE_VFS_IOV_ITER */ /* * This function pins user pages. In the event that the user pages were not @@ -703,14 +550,9 @@ zfs_uio_get_dio_pages_alloc(zfs_uio_t *uio, zfs_uio_rw_t rw) long npages = DIV_ROUND_UP(uio->uio_resid, PAGE_SIZE); size_t size = npages * sizeof (struct page *); - if (uio->uio_segflg == UIO_USERSPACE) { - uio->uio_dio.pages = vmem_alloc(size, KM_SLEEP); - error = zfs_uio_get_dio_pages_iov(uio, rw); -#if defined(HAVE_VFS_IOV_ITER) - } else if (uio->uio_segflg == UIO_ITER) { + if (uio->uio_segflg == UIO_ITER) { uio->uio_dio.pages = vmem_alloc(size, KM_SLEEP); error = zfs_uio_get_dio_pages_iov_iter(uio, rw); -#endif } else { return (SET_ERROR(EOPNOTSUPP)); } diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c index f6e014327717..57713d81c3a0 100644 --- a/module/os/linux/zfs/zpl_file.c +++ b/module/os/linux/zfs/zpl_file.c @@ -216,27 +216,6 @@ zpl_file_accessed(struct file *filp) } } -/* - * When HAVE_VFS_IOV_ITER is defined the iov_iter structure supports - * iovecs, kvevs, bvecs and pipes, plus all the required interfaces to - * manipulate the iov_iter are available. In which case the full iov_iter - * can be attached to the uio and correctly handled in the lower layers. - * Otherwise, for older kernels extract the iovec and pass it instead. - */ -static void -zpl_uio_init(zfs_uio_t *uio, struct kiocb *kiocb, struct iov_iter *to, - loff_t pos, ssize_t count, size_t skip) -{ -#if defined(HAVE_VFS_IOV_ITER) - zfs_uio_iov_iter_init(uio, to, pos, count, skip); -#else - zfs_uio_iovec_init(uio, zfs_uio_iter_iov(to), to->nr_segs, pos, - zfs_uio_iov_iter_type(to) & ITER_KVEC ? - UIO_SYSSPACE : UIO_USERSPACE, - count, skip); -#endif -} - static ssize_t zpl_iter_read(struct kiocb *kiocb, struct iov_iter *to) { @@ -246,7 +225,7 @@ zpl_iter_read(struct kiocb *kiocb, struct iov_iter *to) ssize_t count = iov_iter_count(to); zfs_uio_t uio; - zpl_uio_init(&uio, kiocb, to, kiocb->ki_pos, count, 0); + zfs_uio_iov_iter_init(&uio, to, kiocb->ki_pos, count, 0); crhold(cr); cookie = spl_fstrans_mark(); @@ -296,7 +275,8 @@ zpl_iter_write(struct kiocb *kiocb, struct iov_iter *from) if (ret) return (ret); - zpl_uio_init(&uio, kiocb, from, kiocb->ki_pos, count, from->iov_offset); + zfs_uio_iov_iter_init(&uio, from, kiocb->ki_pos, count, + from->iov_offset); crhold(cr); cookie = spl_fstrans_mark(); @@ -317,34 +297,18 @@ zpl_iter_write(struct kiocb *kiocb, struct iov_iter *from) } static ssize_t -zpl_direct_IO_impl(void) +zpl_direct_IO(struct kiocb *kiocb, struct iov_iter *iter) { /* * All O_DIRECT requests should be handled by - * zpl_{iter/aio}_{write/read}(). There is no way kernel generic code - * should call the direct_IO address_space_operations function. We set - * this code path to be fatal if it is executed. + * zpl_iter_write/read}(). There is no way kernel generic code should + * call the direct_IO address_space_operations function. We set this + * code path to be fatal if it is executed. */ PANIC(0); return (0); } -#if defined(HAVE_VFS_DIRECT_IO_ITER) -static ssize_t -zpl_direct_IO(struct kiocb *kiocb, struct iov_iter *iter) -{ - return (zpl_direct_IO_impl()); -} -#elif defined(HAVE_VFS_DIRECT_IO_ITER_OFFSET) -static ssize_t -zpl_direct_IO(struct kiocb *kiocb, struct iov_iter *iter, loff_t pos) -{ - return (zpl_direct_IO_impl()); -} -#else -#error "Unknown Direct I/O interface" -#endif - static loff_t zpl_llseek(struct file *filp, loff_t offset, int whence) { @@ -1104,14 +1068,12 @@ const struct file_operations zpl_file_operations = { .llseek = zpl_llseek, .read_iter = zpl_iter_read, .write_iter = zpl_iter_write, -#ifdef HAVE_VFS_IOV_ITER #ifdef HAVE_COPY_SPLICE_READ .splice_read = copy_splice_read, #else .splice_read = generic_file_splice_read, #endif .splice_write = iter_file_splice_write, -#endif .mmap = zpl_mmap, .fsync = zpl_fsync, .fallocate = zpl_fallocate,