From 2daa2bcad677a69ba9a38fdc9a11a4390d2016bb Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Fri, 6 Dec 2024 13:33:37 +0900 Subject: [PATCH] nvme: add low-level api to map prp/sgl without an nvme_rq Add low-level helpers to map PRP and SGL data and data list which contains the actual prp and sgl entries within a memory page. The existing nvme_rq mapping helpers always use the preallocated struct nvme_rq page.vaddr member. Some applications might want to locate the prplist or sgl segments in another location such as the CMB (Controller Memory Buffer) rather than the host memory. In that case, the application could update the struct nvme_rq page member, but it's not a good idea since it is a library-specific data structure and has its own policy. To give more flexible options to upper layer applications, this patch adds a low-level API which are more generic using parameters for the prplist or segment list. The newly added public helpers are: * nvme_map_prp * nvme_mapv_prp * nvme_mapv_sgl This does not have functional change on the existing API. Signed-off-by: Minwoo Im [k.jensen: update commit message and fixed tests] Signed-off-by: Klaus Jensen --- include/vfn/nvme/rq.h | 9 ++ include/vfn/nvme/util.h | 51 +++++++++ src/nvme/rq.c | 226 +----------------------------------- src/nvme/rq_test.c | 26 ++--- src/nvme/util.c | 248 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 326 insertions(+), 234 deletions(-) diff --git a/include/vfn/nvme/rq.h b/include/vfn/nvme/rq.h index 47b847d2..c87ff9b2 100644 --- a/include/vfn/nvme/rq.h +++ b/include/vfn/nvme/rq.h @@ -212,6 +212,9 @@ static inline void nvme_rq_exec(struct nvme_rq *rq, union nvme_cmd *cmd) * * Map a buffer of size @len into the command payload. * + * This helper uses a pre-allocated PRP list page within @rq and same with + * calling ``nvme_map_prp(ctrl, rq->page.vaddr, ...)``. + * * Return: ``0`` on success, ``-1`` on error and sets errno. */ int nvme_rq_map_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd, uint64_t iova, @@ -230,6 +233,9 @@ int nvme_rq_map_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd * * allowed to be unaligned, but the entry MUST end on a page boundary. All * subsequent entries MUST be page aligned. * + * This helper uses a pre-allocated PRP list page within @rq and same with + * calling ``nvme_mapv_prp(ctrl, rq->page.vaddr, cmd, iova, niov)``; + * * Return: ``0`` on success, ``-1`` on error and sets errno. */ int nvme_rq_mapv_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd, @@ -246,6 +252,9 @@ int nvme_rq_mapv_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd * * Map the memory contained in @iov into the request SGL. * + * This helper uses a pre-allocated SGL segment list page within @rq and same + * with calling ``nvme_mapv_sgl(ctrl, rq->page.vaddr, cmd, iova, niov)``; + * * Return: ``0`` on success, ``-1`` on error and sets errno. */ int nvme_rq_mapv_sgl(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd, diff --git a/include/vfn/nvme/util.h b/include/vfn/nvme/util.h index 6d36876f..4abd31c2 100644 --- a/include/vfn/nvme/util.h +++ b/include/vfn/nvme/util.h @@ -110,4 +110,55 @@ int nvme_sync(struct nvme_ctrl *ctrl, struct nvme_sq *sq, union nvme_cmd *sqe, v int nvme_admin(struct nvme_ctrl *ctrl, union nvme_cmd *sqe, void *buf, size_t len, struct nvme_cqe *cqe_copy); +/** + * nvme_map_prp - Set up the Physical Region Pages in the data pointer of the + * command from a buffer that is contiguous in iova mapped + * memory. + * @ctrl: &struct nvme_ctrl + * @prplist: The first PRP list page address + * @cmd: NVMe command prototype (&union nvme_cmd) + * @iova: I/O Virtual Address + * @len: Length of buffer + * + * Map a buffer of size @len into the command payload. + * + * Return: ``0`` on success, ``-1`` on error and sets errno. + */ +int nvme_map_prp(struct nvme_ctrl *ctrl, leint64_t *prplist, union nvme_cmd *cmd, + uint64_t iova, size_t len); + +/** + * nvme_mapv_prp - Set up the Physical Region Pages in the data pointer of + * the command from an iovec. + * @ctrl: &struct nvme_ctrl + * @prplist: The first PRP list page address + * @cmd: NVMe command prototype (&union nvme_cmd) + * @iov: array of iovecs + * @niov: number of iovec in @iovec + * + * Map the memory contained in @iov into the request PRPs. The first entry is + * allowed to be unaligned, but the entry MUST end on a page boundary. All + * subsequent entries MUST be page aligned. + * + * Return: ``0`` on success, ``-1`` on error and sets errno. + */ +int nvme_mapv_prp(struct nvme_ctrl *ctrl, leint64_t *prplist, + union nvme_cmd *cmd, struct iovec *iov, int niov); + +/** + * nvme_mapv_sgl - Set up a Scatter/Gather List in the data pointer of the + * command from an iovec. + * @ctrl: &struct nvme_ctrl + * @seglist: SGL segment list page address + * @cmd: NVMe command prototype (&union nvme_cmd) + * @iov: array of iovecs + * @niov: number of iovec in @iovec + * + * Map the memory contained in @iov into the request SGL. + * + * Return: ``0`` on success, ``-1`` on error and sets errno. + */ +int nvme_mapv_sgl(struct nvme_ctrl *ctrl, struct nvme_sgld *seglist, union nvme_cmd *cmd, + struct iovec *iov, int niov); + #endif /* LIBVFN_NVME_UTIL_H */ diff --git a/src/nvme/rq.c b/src/nvme/rq.c index 9e1986dd..490ca8c9 100644 --- a/src/nvme/rq.c +++ b/src/nvme/rq.c @@ -35,241 +35,25 @@ #include #include -#include "ccan/minmax/minmax.h" - #include "iommu/context.h" #include "types.h" -static inline int __map_prp_first(leint64_t *prp1, leint64_t *prplist, uint64_t iova, size_t len, - int pageshift) +int nvme_rq_map_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd, + uint64_t iova, size_t len) { - size_t pagesize = 1 << pageshift; - int max_prps = 1 << (pageshift - 3); - - /* number of prps required to map the buffer */ - int prpcount = 1; - - *prp1 = cpu_to_le64(iova); - - /* account for what is covered with the first prp */ - len -= min_t(size_t, len, pagesize - (iova & (pagesize - 1))); - - /* any residual just adds more prps */ - if (len) - prpcount += (int)ALIGN_UP(len, pagesize) >> pageshift; - - if (prpcount > 1 && !ALIGNED(iova, pagesize)) - /* align down to simplify loop below */ - iova = ALIGN_DOWN(iova, pagesize); - - if (prpcount > max_prps) { - errno = EINVAL; - return -1; - } - - /* - * Map the remaining parts of the buffer into prp2/prplist. iova will be - * aligned from the above, which simplifies this. - */ - for (int i = 1; i < prpcount; i++) - prplist[i - 1] = cpu_to_le64(iova + ((uint64_t)i << pageshift)); - - /* - * prpcount may be zero if the buffer length was less than the page - * size, so clamp it to 1 in that case. - */ - return clamp_t(int, prpcount, 1, prpcount); -} - -static inline int __map_prp_append(leint64_t *prplist, uint64_t iova, size_t len, int max_prps, - int pageshift) -{ - int prpcount = max_t(int, 1, (int)len >> pageshift); - size_t pagesize = 1 << pageshift; - - if (prpcount > max_prps) { - log_error("too many prps required\n"); - - errno = EINVAL; - return -1; - } - - if (!ALIGNED(iova, pagesize)) { - log_error("unaligned iova 0x%" PRIx64 "\n", iova); - - errno = EINVAL; - return -1; - } - - for (int i = 0; i < prpcount; i++) - prplist[i] = cpu_to_le64(iova + ((uint64_t)i << pageshift)); - - return prpcount; -} - -static inline void __set_prp2(leint64_t *prp2, leint64_t prplist, leint64_t prplist0, int prpcount) -{ - if (prpcount == 2) - *prp2 = prplist0; - else if (prpcount > 2) - *prp2 = prplist; - else - *prp2 = 0x0; -} - -int nvme_rq_map_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd, uint64_t iova, - size_t len) -{ - int prpcount; - leint64_t *prplist = rq->page.vaddr; - int pageshift = __mps_to_pageshift(ctrl->config.mps); - - prpcount = __map_prp_first(&cmd->dptr.prp1, prplist, iova, len, pageshift); - if (prpcount < 0) { - errno = EINVAL; - return -1; - } - - __set_prp2(&cmd->dptr.prp2, cpu_to_le64(rq->page.iova), prplist[0], prpcount); - - return 0; + return nvme_map_prp(ctrl, rq->page.vaddr, cmd, iova, len); } int nvme_rq_mapv_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd, struct iovec *iov, int niov) { - struct iommu_ctx *ctx = __iommu_ctx(ctrl); - - leint64_t *prplist = rq->page.vaddr; - size_t len = iov->iov_len; - int pageshift = __mps_to_pageshift(ctrl->config.mps); - size_t pagesize = 1 << pageshift; - int max_prps = 1 << (pageshift - 3); - int ret, prpcount; - uint64_t iova; - - if (!iommu_translate_vaddr(ctx, iov->iov_base, &iova)) { - errno = EFAULT; - return -1; - } - - /* map the first segment */ - prpcount = __map_prp_first(&cmd->dptr.prp1, prplist, iova, len, pageshift); - if (prpcount < 0) - goto invalid; - - /* - * At this point, one of three conditions must hold: - * - * a) a single prp entry was set up by __map_first, or - * b) the iovec only has a single entry, or - * c) the first buffer ends on a page size boundary - * - * If none holds, the buffer(s) within the iovec cannot be mapped given - * the PRP alignment requirements. - */ - if (!(prpcount == 1 || niov == 1 || ALIGNED(iova + len, pagesize))) { - log_error("iov[0].iov_base/len invalid\n"); - - goto invalid; - } - - /* map remaining iovec entries; these must be page size aligned */ - for (int i = 1; i < niov; i++) { - if (!iommu_translate_vaddr(ctx, iov[i].iov_base, &iova)) { - errno = EFAULT; - return -1; - } - - len = iov[i].iov_len; - - /* all entries but the last must have a page size aligned len */ - if (i < niov - 1 && !ALIGNED(len, pagesize)) { - log_error("unaligned iov[%u].len (%zu)\n", i, len); - - goto invalid; - } - - ret = __map_prp_append(&prplist[prpcount - 1], iova, len, max_prps - prpcount, - pageshift); - if (ret < 0) - goto invalid; - - prpcount += ret; - } - - __set_prp2(&cmd->dptr.prp2, cpu_to_le64(rq->page.iova), prplist[0], prpcount); - - return 0; - -invalid: - errno = EINVAL; - return -1; -} - -static inline void __sgl_data(struct nvme_sgld *sgld, uint64_t iova, size_t len) -{ - sgld->addr = cpu_to_le64(iova); - sgld->len = cpu_to_le32((uint32_t)len); - - sgld->type = NVME_SGLD_TYPE_DATA_BLOCK << 4; -} - -static inline void __sgl_segment(struct nvme_sgld *sgld, uint64_t iova, int n) -{ - sgld->addr = cpu_to_le64(iova); - sgld->len = cpu_to_le32(n << 4); - - sgld->type = NVME_SGLD_TYPE_LAST_SEGMENT << 4; + return nvme_mapv_prp(ctrl, rq->page.vaddr, cmd, iov, niov); } int nvme_rq_mapv_sgl(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd, struct iovec *iov, int niov) { - struct nvme_sgld *seg = rq->page.vaddr; - struct iommu_ctx *ctx = __iommu_ctx(ctrl); - - int pageshift = __mps_to_pageshift(ctrl->config.mps); - int max_sglds = 1 << (pageshift - 4); - int dword_align = ctrl->flags & NVME_CTRL_F_SGLS_DWORD_ALIGNMENT; - - uint64_t iova; - - if (niov == 1) { - if (!iommu_translate_vaddr(ctx, iov->iov_base, &iova)) { - errno = EFAULT; - return -1; - } - - __sgl_data(&cmd->dptr.sgl, iova, iov->iov_len); - - return 0; - } - - if (niov > max_sglds) { - errno = EINVAL; - return -1; - } - - __sgl_segment(&cmd->dptr.sgl, rq->page.iova, niov); - - for (int i = 0; i < niov; i++) { - if (!iommu_translate_vaddr(ctx, iov[i].iov_base, &iova)) { - errno = EFAULT; - return -1; - } - - if (dword_align && (iova & 0x3)) { - errno = EINVAL; - return -1; - } - - __sgl_data(&seg[i], iova, iov[i].iov_len); - } - - cmd->flags |= NVME_FIELD_SET(NVME_CMD_FLAGS_PSDT_SGL_MPTR_CONTIG, CMD_FLAGS_PSDT); - - return 0; + return nvme_mapv_sgl(ctrl, rq->page.vaddr, cmd, iov, niov); } int nvme_rq_mapv(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd, diff --git a/src/nvme/rq_test.c b/src/nvme/rq_test.c index 79c691aa..7c73d77d 100644 --- a/src/nvme/rq_test.c +++ b/src/nvme/rq_test.c @@ -56,7 +56,7 @@ int main(void) assert(pgmap((void **)&rq.page.vaddr, __VFN_PAGESIZE) > 0); - rq.page.iova = 0x8000000; + rq.page.iova = (uint64_t)rq.page.vaddr; prplist = rq.page.vaddr; sglds = rq.page.vaddr; @@ -93,7 +93,7 @@ int main(void) ok1(nvme_rq_map_prp(&ctrl, &rq, &cmd, 0x1000000, 0x2010) == 0); ok1(le64_to_cpu(cmd.dptr.prp1) == 0x1000000); - ok1(le64_to_cpu(cmd.dptr.prp2) == 0x8000000); + ok1(le64_to_cpu(cmd.dptr.prp2) == rq.page.iova); ok1(le64_to_cpu(prplist[0]) == 0x1001000); ok1(le64_to_cpu(prplist[1]) == 0x1002000); @@ -102,7 +102,7 @@ int main(void) ok1(nvme_rq_map_prp(&ctrl, &rq, &cmd, 0x1000000, 0x3000) == 0); ok1(le64_to_cpu(cmd.dptr.prp1) == 0x1000000); - ok1(le64_to_cpu(cmd.dptr.prp2) == 0x8000000); + ok1(le64_to_cpu(cmd.dptr.prp2) == rq.page.iova); ok1(le64_to_cpu(prplist[0]) == 0x1001000); ok1(le64_to_cpu(prplist[1]) == 0x1002000); @@ -111,7 +111,7 @@ int main(void) ok1(nvme_rq_map_prp(&ctrl, &rq, &cmd, 0x1000000, 0x3018) == 0); ok1(le64_to_cpu(cmd.dptr.prp1) == 0x1000000); - ok1(le64_to_cpu(cmd.dptr.prp2) == 0x8000000); + ok1(le64_to_cpu(cmd.dptr.prp2) == rq.page.iova); ok1(le64_to_cpu(prplist[0]) == 0x1001000); ok1(le64_to_cpu(prplist[1]) == 0x1002000); @@ -148,7 +148,7 @@ int main(void) ok1(nvme_rq_map_prp(&ctrl, &rq, &cmd, 0x1001000 - 4, 0x1008) == 0); ok1(le64_to_cpu(cmd.dptr.prp1) == 0x1001000 - 4); - ok1(le64_to_cpu(cmd.dptr.prp2) == 0x8000000); + ok1(le64_to_cpu(cmd.dptr.prp2) == rq.page.iova); ok1(le64_to_cpu(prplist[0]) == 0x1001000); ok1(le64_to_cpu(prplist[1]) == 0x1002000); @@ -164,7 +164,7 @@ int main(void) ok1(nvme_rq_map_prp(&ctrl, &rq, &cmd, 0x1000004, 0x2000) == 0); ok1(le64_to_cpu(cmd.dptr.prp1) == 0x1000004); - ok1(le64_to_cpu(cmd.dptr.prp2) == 0x8000000); + ok1(le64_to_cpu(cmd.dptr.prp2) == rq.page.iova); ok1(le64_to_cpu(prplist[0]) == 0x1001000); ok1(le64_to_cpu(prplist[1]) == 0x1002000); @@ -173,7 +173,7 @@ int main(void) ok1(nvme_rq_map_prp(&ctrl, &rq, &cmd, 0x1000004, 0x2010) == 0); ok1(le64_to_cpu(cmd.dptr.prp1) == 0x1000004); - ok1(le64_to_cpu(cmd.dptr.prp2) == 0x8000000); + ok1(le64_to_cpu(cmd.dptr.prp2) == rq.page.iova); ok1(le64_to_cpu(prplist[0]) == 0x1001000); ok1(le64_to_cpu(prplist[1]) == 0x1002000); @@ -189,7 +189,7 @@ int main(void) ok1(nvme_rq_map_prp(&ctrl, &rq, &cmd, 0x1000004, 0x3000) == 0); ok1(le64_to_cpu(cmd.dptr.prp1) == 0x1000004); - ok1(le64_to_cpu(cmd.dptr.prp2) == 0x8000000); + ok1(le64_to_cpu(cmd.dptr.prp2) == rq.page.iova); ok1(le64_to_cpu(prplist[0]) == 0x1001000); ok1(le64_to_cpu(prplist[1]) == 0x1002000); ok1(le64_to_cpu(prplist[2]) == 0x1003000); @@ -199,7 +199,7 @@ int main(void) ok1(nvme_rq_map_prp(&ctrl, &rq, &cmd, 0x1000004, 0x3018) == 0); ok1(le64_to_cpu(cmd.dptr.prp1) == 0x1000004); - ok1(le64_to_cpu(cmd.dptr.prp2) == 0x8000000); + ok1(le64_to_cpu(cmd.dptr.prp2) == rq.page.iova); ok1(le64_to_cpu(prplist[0]) == 0x1001000); ok1(le64_to_cpu(prplist[1]) == 0x1002000); ok1(le64_to_cpu(prplist[2]) == 0x1003000); @@ -234,7 +234,7 @@ int main(void) ok1(nvme_rq_mapv_prp(&ctrl, &rq, &cmd, iov, 1) == 0); ok1(le64_to_cpu(cmd.dptr.prp1) == 0x1000000); - ok1(le64_to_cpu(cmd.dptr.prp2) == 0x8000000); + ok1(le64_to_cpu(cmd.dptr.prp2) == rq.page.iova); ok1(le64_to_cpu(prplist[0]) == 0x1001000); ok1(le64_to_cpu(prplist[1]) == 0x1002000); @@ -255,7 +255,7 @@ int main(void) ok1(nvme_rq_mapv_prp(&ctrl, &rq, &cmd, iov, 3) == 0); ok1(le64_to_cpu(cmd.dptr.prp1) == 0x1000000); - ok1(le64_to_cpu(cmd.dptr.prp2) == 0x8000000); + ok1(le64_to_cpu(cmd.dptr.prp2) == rq.page.iova); ok1(le64_to_cpu(prplist[0]) == 0x1001000); ok1(le64_to_cpu(prplist[1]) == 0x1002000); @@ -266,7 +266,7 @@ int main(void) ok1(nvme_rq_mapv_prp(&ctrl, &rq, &cmd, iov, 3) == 0); ok1(le64_to_cpu(cmd.dptr.prp1) == 0x1000000); - ok1(le64_to_cpu(cmd.dptr.prp2) == 0x8000000); + ok1(le64_to_cpu(cmd.dptr.prp2) == rq.page.iova); ok1(le64_to_cpu(prplist[0]) == 0x1001000); ok1(le64_to_cpu(prplist[1]) == 0x1002000); @@ -303,7 +303,7 @@ int main(void) ok1(nvme_rq_mapv_prp(&ctrl, &rq, &cmd, iov, 3) == 0); ok1(le64_to_cpu(cmd.dptr.prp1) == 0x1000004); - ok1(le64_to_cpu(cmd.dptr.prp2) == 0x8000000); + ok1(le64_to_cpu(cmd.dptr.prp2) == rq.page.iova); ok1(le64_to_cpu(prplist[0]) == 0x1001000); ok1(le64_to_cpu(prplist[1]) == 0x1002000); diff --git a/src/nvme/util.c b/src/nvme/util.c index a509ceb7..f761cf45 100644 --- a/src/nvme/util.c +++ b/src/nvme/util.c @@ -34,6 +34,7 @@ #include #include +#include "ccan/minmax/minmax.h" #include "types.h" #include "crc64table.h" @@ -141,3 +142,250 @@ int nvme_admin(struct nvme_ctrl *ctrl, union nvme_cmd *sqe, void *buf, size_t le { return nvme_sync(ctrl, ctrl->adminq.sq, sqe, buf, len, cqe_copy); } + +static inline int __map_prp_first(leint64_t *prp1, leint64_t *prplist, uint64_t iova, size_t len, + int pageshift) +{ + size_t pagesize = 1 << pageshift; + int max_prps = 1 << (pageshift - 3); + + /* number of prps required to map the buffer */ + int prpcount = 1; + + *prp1 = cpu_to_le64(iova); + + /* account for what is covered with the first prp */ + len -= min_t(size_t, len, pagesize - (iova & (pagesize - 1))); + + /* any residual just adds more prps */ + if (len) + prpcount += (int)ALIGN_UP(len, pagesize) >> pageshift; + + if (prpcount > 1 && !ALIGNED(iova, pagesize)) + /* align down to simplify loop below */ + iova = ALIGN_DOWN(iova, pagesize); + + if (prpcount > max_prps) { + errno = EINVAL; + return -1; + } + + /* + * Map the remaining parts of the buffer into prp2/prplist. iova will be + * aligned from the above, which simplifies this. + */ + for (int i = 1; i < prpcount; i++) + prplist[i - 1] = cpu_to_le64(iova + ((uint64_t)i << pageshift)); + + /* + * prpcount may be zero if the buffer length was less than the page + * size, so clamp it to 1 in that case. + */ + return clamp_t(int, prpcount, 1, prpcount); +} + +static inline int __map_prp_append(leint64_t *prplist, uint64_t iova, size_t len, int max_prps, + int pageshift) +{ + int prpcount = max_t(int, 1, (int)len >> pageshift); + size_t pagesize = 1 << pageshift; + + if (prpcount > max_prps) { + log_error("too many prps required\n"); + + errno = EINVAL; + return -1; + } + + if (!ALIGNED(iova, pagesize)) { + log_error("unaligned iova 0x%" PRIx64 "\n", iova); + + errno = EINVAL; + return -1; + } + + for (int i = 0; i < prpcount; i++) + prplist[i] = cpu_to_le64(iova + ((uint64_t)i << pageshift)); + + return prpcount; +} + +static inline void __set_prp2(leint64_t *prp2, leint64_t prplist, leint64_t prplist0, int prpcount) +{ + if (prpcount == 2) + *prp2 = prplist0; + else if (prpcount > 2) + *prp2 = prplist; + else + *prp2 = 0x0; +} + +int nvme_map_prp(struct nvme_ctrl *ctrl, leint64_t *prplist, union nvme_cmd *cmd, + uint64_t iova, size_t len) +{ + struct iommu_ctx *ctx = __iommu_ctx(ctrl); + int prpcount; + int pageshift = __mps_to_pageshift(ctrl->config.mps); + uint64_t prplist_iova; + + if (!iommu_translate_vaddr(ctx, prplist, &prplist_iova)) { + errno = EFAULT; + return -1; + } + + prpcount = __map_prp_first(&cmd->dptr.prp1, prplist, iova, len, pageshift); + if (prpcount < 0) { + errno = EINVAL; + return -1; + } + + __set_prp2(&cmd->dptr.prp2, cpu_to_le64(prplist_iova), prplist[0], prpcount); + + return 0; +} + +int nvme_mapv_prp(struct nvme_ctrl *ctrl, leint64_t *prplist, + union nvme_cmd *cmd, struct iovec *iov, int niov) +{ + struct iommu_ctx *ctx = __iommu_ctx(ctrl); + + size_t len = iov->iov_len; + int pageshift = __mps_to_pageshift(ctrl->config.mps); + size_t pagesize = 1 << pageshift; + int max_prps = 1 << (pageshift - 3); + int ret, prpcount; + uint64_t iova, prplist_iova; + + if (!iommu_translate_vaddr(ctx, iov->iov_base, &iova)) { + errno = EFAULT; + return -1; + } + + if (!iommu_translate_vaddr(ctx, prplist, &prplist_iova)) { + errno = EFAULT; + return -1; + } + + /* map the first segment */ + prpcount = __map_prp_first(&cmd->dptr.prp1, prplist, iova, len, pageshift); + if (prpcount < 0) + goto invalid; + + /* + * At this point, one of three conditions must hold: + * + * a) a single prp entry was set up by __map_first, or + * b) the iovec only has a single entry, or + * c) the first buffer ends on a page size boundary + * + * If none holds, the buffer(s) within the iovec cannot be mapped given + * the PRP alignment requirements. + */ + if (!(prpcount == 1 || niov == 1 || ALIGNED(iova + len, pagesize))) { + log_error("iov[0].iov_base/len invalid\n"); + + goto invalid; + } + + /* map remaining iovec entries; these must be page size aligned */ + for (int i = 1; i < niov; i++) { + if (!iommu_translate_vaddr(ctx, iov[i].iov_base, &iova)) { + errno = EFAULT; + return -1; + } + + len = iov[i].iov_len; + + /* all entries but the last must have a page size aligned len */ + if (i < niov - 1 && !ALIGNED(len, pagesize)) { + log_error("unaligned iov[%u].len (%zu)\n", i, len); + + goto invalid; + } + + ret = __map_prp_append(&prplist[prpcount - 1], iova, len, max_prps - prpcount, + pageshift); + if (ret < 0) + goto invalid; + + prpcount += ret; + } + + __set_prp2(&cmd->dptr.prp2, cpu_to_le64(prplist_iova), prplist[0], prpcount); + + return 0; + +invalid: + errno = EINVAL; + return -1; +} + +static inline void __sgl_data(struct nvme_sgld *sgld, uint64_t iova, size_t len) +{ + sgld->addr = cpu_to_le64(iova); + sgld->len = cpu_to_le32((uint32_t)len); + + sgld->type = NVME_SGLD_TYPE_DATA_BLOCK << 4; +} + +static inline void __sgl_segment(struct nvme_sgld *sgld, uint64_t iova, int n) +{ + sgld->addr = cpu_to_le64(iova); + sgld->len = cpu_to_le32(n << 4); + + sgld->type = NVME_SGLD_TYPE_LAST_SEGMENT << 4; +} + +int nvme_mapv_sgl(struct nvme_ctrl *ctrl, struct nvme_sgld *seg, union nvme_cmd *cmd, + struct iovec *iov, int niov) +{ + struct iommu_ctx *ctx = __iommu_ctx(ctrl); + + int pageshift = __mps_to_pageshift(ctrl->config.mps); + int max_sglds = 1 << (pageshift - 4); + int dword_align = ctrl->flags & NVME_CTRL_F_SGLS_DWORD_ALIGNMENT; + + uint64_t iova; + uint64_t seg_iova; + + if (niov == 1) { + if (!iommu_translate_vaddr(ctx, iov->iov_base, &iova)) { + errno = EFAULT; + return -1; + } + + __sgl_data(&cmd->dptr.sgl, iova, iov->iov_len); + + return 0; + } + + if (niov > max_sglds) { + errno = EINVAL; + return -1; + } + + if (!iommu_translate_vaddr(ctx, seg, &seg_iova)) { + errno = EFAULT; + return -1; + } + + __sgl_segment(&cmd->dptr.sgl, seg_iova, niov); + + for (int i = 0; i < niov; i++) { + if (!iommu_translate_vaddr(ctx, iov[i].iov_base, &iova)) { + errno = EFAULT; + return -1; + } + + if (dword_align && (iova & 0x3)) { + errno = EINVAL; + return -1; + } + + __sgl_data(&seg[i], iova, iov[i].iov_len); + } + + cmd->flags |= NVME_FIELD_SET(NVME_CMD_FLAGS_PSDT_SGL_MPTR_CONTIG, CMD_FLAGS_PSDT); + + return 0; +}