diff --git a/Bender.lock b/Bender.lock index eca72d58..b08779bb 100644 --- a/Bender.lock +++ b/Bender.lock @@ -15,8 +15,8 @@ packages: - apb - register_interface axi: - revision: bfee21757bf090ec8e358456314b0b0fd3c90809 - version: 0.39.0 + revision: fccffb5953ec8564218ba05e20adbedec845e014 + version: 0.39.1 source: Git: https://github.com/pulp-platform/axi.git dependencies: @@ -108,8 +108,8 @@ packages: dependencies: - common_cells idma: - revision: 437ffa9dac5dea0daccfd3e8ae604d4f6ae2cdf1 - version: null + revision: 2c64e0773fab5a54757646715485fcdf3432c7c1 + version: 0.5.0 source: Git: https://github.com/pulp-platform/iDMA.git dependencies: diff --git a/Bender.yml b/Bender.yml index b977a6b7..c9a5c7a4 100644 --- a/Bender.yml +++ b/Bender.yml @@ -21,7 +21,7 @@ dependencies: common_cells: { git: "https://github.com/pulp-platform/common_cells.git", version: 1.29.0 } common_verification: { git: "https://github.com/pulp-platform/common_verification.git", version: 0.2.0 } cva6: { git: "https://github.com/pulp-platform/cva6.git", rev: pulp-v0.4.3 } - iDMA: { git: "https://github.com/pulp-platform/iDMA.git", rev: 437ffa9 } # TODO: master commit; use next release once out + iDMA: { git: "https://github.com/pulp-platform/iDMA.git", version: 0.5.0 } opentitan_peripherals: { git: "https://github.com/pulp-platform/opentitan_peripherals.git", version: 0.4.0 } register_interface: { git: "https://github.com/pulp-platform/register_interface.git", version: 0.4.1 } riscv-dbg: { git: "https://github.com/pulp-platform/riscv-dbg.git", version: 0.8.0 } diff --git a/cheshire.mk b/cheshire.mk index 4f3dffa7..afbfcc48 100644 --- a/cheshire.mk +++ b/cheshire.mk @@ -21,6 +21,7 @@ CHS_LLC_DIR := $(shell $(BENDER) path axi_llc) OTPROOT := $(shell $(BENDER) path opentitan_peripherals) CLINTROOT := $(shell $(BENDER) path clint) AXI_VGA_ROOT := $(shell $(BENDER) path axi_vga) +IDMA_ROOT := $(shell $(BENDER) path idma) REGTOOL ?= $(CHS_REG_DIR)/vendor/lowrisc_opentitan/util/regtool.py diff --git a/docs/um/arch.md b/docs/um/arch.md index 5c92f26b..83829bbc 100644 --- a/docs/um/arch.md +++ b/docs/um/arch.md @@ -236,13 +236,18 @@ The [Serial Link](https://github.com/pulp-platform/serial_link) is a fully digit ### DMA engine -The [iDMA engine](https://github.com/pulp-platform/iDMA) enables high-throughput asynchronous transfers between any two subordinate address ranges in the system. It exposes the following parameters: +The [iDMA engine](https://github.com/pulp-platform/iDMA) enables high-throughput asynchronous transfers between any two subordinate address ranges in the system. The hardware supports, if enabled, up to two-dimensional transfers directly in hardware. It exposes the following parameters: | Parameter | Type / Range | Description | | ---------------------------- | ------------ | ------------------------------------------------- | | `DmaConfMax(Read|Write)Txns` | `dw_bt` | Max. number of outstanding requests to DMA config | | `DmaConfAmoNumCuts` | `aw_bt` | Number of timing cuts inside config AMO filter | | `DmaConfAmoPostCut` | `bit` | Whether to insert a cut after config AMO filter | +| `DmaConfEnableTwoD` | `bit` | Whether the 2D hardware extension is present | +| `DmaNumAxInFlight` | `dw_bt` | Number of outstanding transfers the DMA launches | +| `DmaMemSysDepth` | `dw_bt` | The *approximate* depth of the memory system | +| `DmaJobFifoDepth` | `aw_bt` | The depth of the job FIFO | +| `DmaRAWCouplingAvail` | `bit` | Whether the R-AW coupling feature is available | ### I2C, SPI, GPIOs diff --git a/hw/cheshire_pkg.sv b/hw/cheshire_pkg.sv index 1d3b09df..028fdb2e 100644 --- a/hw/cheshire_pkg.sv +++ b/hw/cheshire_pkg.sv @@ -172,6 +172,11 @@ package cheshire_pkg; dw_bt DmaConfMaxWriteTxns; aw_bt DmaConfAmoNumCuts; bit DmaConfAmoPostCut; + bit DmaConfEnableTwoD; + dw_bt DmaNumAxInFlight; + dw_bt DmaMemSysDepth; + aw_bt DmaJobFifoDepth; + bit DmaRAWCouplingAvail; // Parameters for GPIO bit GpioInputSyncs; // Parameters for AXI RT @@ -514,8 +519,8 @@ package cheshire_pkg; AxiDataWidth : 64, AxiUserWidth : 2, // AMO(2) AxiMstIdWidth : 2, - AxiMaxMstTrans : 8, - AxiMaxSlvTrans : 8, + AxiMaxMstTrans : 24, + AxiMaxSlvTrans : 24, AxiUserAmoMsb : 1, // Convention: lower AMO bits for cores, MSB for serial link AxiUserAmoLsb : 0, // Convention: lower AMO bits for cores, MSB for serial link AxiUserDefault : 0, @@ -550,8 +555,8 @@ package cheshire_pkg; LlcSetAssoc : 8, LlcNumLines : 256, LlcNumBlocks : 8, - LlcMaxReadTxns : 8, - LlcMaxWriteTxns : 8, + LlcMaxReadTxns : 16, + LlcMaxWriteTxns : 16, LlcAmoNumCuts : 1, LlcAmoPostCut : 1, LlcOutConnect : 1, @@ -577,6 +582,11 @@ package cheshire_pkg; DmaConfMaxWriteTxns : 4, DmaConfAmoNumCuts : 1, DmaConfAmoPostCut : 1, + DmaConfEnableTwoD : 1, + DmaNumAxInFlight : 16, + DmaMemSysDepth : 8, + DmaJobFifoDepth : 2, + DmaRAWCouplingAvail : 1, // GPIOs GpioInputSyncs : 1, // AXI RT diff --git a/hw/cheshire_soc.sv b/hw/cheshire_soc.sv index 31812a53..b631db66 100644 --- a/hw/cheshire_soc.sv +++ b/hw/cheshire_soc.sv @@ -1460,15 +1460,20 @@ module cheshire_soc import cheshire_pkg::*; #( end dma_core_wrap #( - .AxiAddrWidth ( Cfg.AddrWidth ), - .AxiDataWidth ( Cfg.AxiDataWidth ), - .AxiIdWidth ( Cfg.AxiMstIdWidth ), - .AxiUserWidth ( Cfg.AxiUserWidth ), - .AxiSlvIdWidth ( AxiSlvIdWidth ), - .axi_mst_req_t ( axi_mst_req_t ), - .axi_mst_rsp_t ( axi_mst_rsp_t ), - .axi_slv_req_t ( axi_slv_req_t ), - .axi_slv_rsp_t ( axi_slv_rsp_t ) + .AxiAddrWidth ( Cfg.AddrWidth ), + .AxiDataWidth ( Cfg.AxiDataWidth ), + .AxiIdWidth ( Cfg.AxiMstIdWidth ), + .AxiUserWidth ( Cfg.AxiUserWidth ), + .AxiSlvIdWidth ( AxiSlvIdWidth ), + .NumAxInFlight ( Cfg.DmaNumAxInFlight ), + .MemSysDepth ( Cfg.DmaMemSysDepth ), + .JobFifoDepth ( Cfg.DmaJobFifoDepth ), + .RAWCouplingAvail ( Cfg.DmaRAWCouplingAvail ), + .IsTwoD ( Cfg.DmaConfEnableTwoD ), + .axi_mst_req_t ( axi_mst_req_t ), + .axi_mst_rsp_t ( axi_mst_rsp_t ), + .axi_slv_req_t ( axi_slv_req_t ), + .axi_slv_rsp_t ( axi_slv_rsp_t ) ) i_dma ( .clk_i, .rst_ni, diff --git a/sw/include/dif/dma.h b/sw/include/dif/dma.h new file mode 100644 index 00000000..f48b0a9f --- /dev/null +++ b/sw/include/dif/dma.h @@ -0,0 +1,130 @@ +// Copyright 2022 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Alessandro Ottaviano +// Thomas Benz + +#include +#include "regs/idma.h" +#include "params.h" + +#define DMA_SRC_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_FRONTEND_SRC_ADDR_REG_OFFSET) +#define DMA_DST_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_FRONTEND_DST_ADDR_REG_OFFSET) +#define DMA_NUMBYTES_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_FRONTEND_NUM_BYTES_REG_OFFSET) +#define DMA_CONF_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_FRONTEND_CONF_REG_OFFSET) +#define DMA_STATUS_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_FRONTEND_STATUS_REG_OFFSET) +#define DMA_NEXTID_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_FRONTEND_NEXT_ID_REG_OFFSET) +#define DMA_DONE_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_FRONTEND_DONE_REG_OFFSET) +#define DMA_SRC_STRIDE_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_FRONTEND_STRIDE_SRC_REG_OFFSET) +#define DMA_DST_STRIDE_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_FRONTEND_STRIDE_DST_REG_OFFSET) +#define DMA_NUM_REPS_ADDR(BASE) ((void *)BASE + IDMA_REG64_2D_FRONTEND_NUM_REPETITIONS_REG_OFFSET) + +#define DMA_CONF_DECOUPLE 0 +#define DMA_CONF_DEBURST 0 +#define DMA_CONF_SERIALIZE 0 + +#define X(NAME, BASE_ADDR) \ + extern volatile uint64_t *NAME##_dma_src_ptr(void); \ + extern volatile uint64_t *NAME##_dma_dst_ptr(void); \ + extern volatile uint64_t *NAME##_dma_num_bytes_ptr(void); \ + extern volatile uint64_t *NAME##_dma_conf_ptr(void); \ + extern volatile uint64_t *NAME##_dma_status_ptr(void); \ + extern volatile uint64_t *NAME##_dma_nextid_ptr(void); \ + extern volatile uint64_t *NAME##_dma_done_ptr(void); \ + extern volatile uint64_t *NAME##_dma_src_stride_ptr(void); \ + extern volatile uint64_t *NAME##_dma_dst_stride_ptr(void); \ + extern volatile uint64_t *NAME##_dma_num_reps_ptr(void); \ +\ + extern uint64_t NAME##_dma_memcpy(uint64_t dst, uint64_t src, uint64_t size); \ + extern void NAME##_dma_blk_memcpy(uint64_t dst, uint64_t src, uint64_t size); \ + extern uint64_t NAME##_dma_2d_memcpy(uint64_t dst, uint64_t src, uint64_t size, \ + uint64_t dst_stride, uint64_t src_stride, \ + uint64_t num_reps); \ + extern void NAME##_dma_2d_blk_memcpy(uint64_t dst, uint64_t src, uint64_t size, \ + uint64_t dst_stride, uint64_t src_stride, \ + uint64_t num_reps); \ +\ + inline volatile uint64_t *NAME##_dma_src_ptr(void) { \ + return (volatile uint64_t *)DMA_SRC_ADDR(BASE_ADDR); \ + } \ + inline volatile uint64_t *NAME##_dma_dst_ptr(void) { \ + return (volatile uint64_t *)DMA_DST_ADDR(BASE_ADDR); \ + } \ + inline volatile uint64_t *NAME##_dma_num_bytes_ptr(void) { \ + return (volatile uint64_t *)DMA_NUMBYTES_ADDR(BASE_ADDR); \ + } \ + inline volatile uint64_t *NAME##_dma_conf_ptr(void) { \ + return (volatile uint64_t *)DMA_CONF_ADDR(BASE_ADDR); \ + } \ + inline volatile uint64_t *NAME##_dma_status_ptr(void) { \ + return (volatile uint64_t *)DMA_STATUS_ADDR(BASE_ADDR); \ + } \ + inline volatile uint64_t *NAME##_dma_nextid_ptr(void) { \ + return (volatile uint64_t *)DMA_NEXTID_ADDR(BASE_ADDR); \ + } \ + inline volatile uint64_t *NAME##_dma_done_ptr(void) { \ + return (volatile uint64_t *)DMA_DONE_ADDR(BASE_ADDR); \ + } \ + inline volatile uint64_t *NAME##_dma_src_stride_ptr(void) { \ + return (volatile uint64_t *)DMA_SRC_STRIDE_ADDR(BASE_ADDR); \ + } \ + inline volatile uint64_t *NAME##_dma_dst_stride_ptr(void) { \ + return (volatile uint64_t *)DMA_DST_STRIDE_ADDR(BASE_ADDR); \ + } \ + inline volatile uint64_t *NAME##_dma_num_reps_ptr(void) { \ + return (volatile uint64_t *)DMA_NUM_REPS_ADDR(BASE_ADDR); \ + } \ +\ + inline uint64_t NAME##_dma_memcpy(uint64_t dst, uint64_t src, uint64_t size) { \ + *(NAME##_dma_src_ptr()) = (uint64_t)src; \ + *(NAME##_dma_dst_ptr()) = (uint64_t)dst; \ + *(NAME##_dma_num_bytes_ptr()) = size; \ + *(NAME##_dma_num_reps_ptr()) = 0; \ + *(NAME##_dma_conf_ptr()) = \ + (DMA_CONF_DECOUPLE << IDMA_REG64_2D_FRONTEND_CONF_DECOUPLE_BIT) | \ + (DMA_CONF_DEBURST << IDMA_REG64_2D_FRONTEND_CONF_DEBURST_BIT) | \ + (DMA_CONF_SERIALIZE << IDMA_REG64_2D_FRONTEND_CONF_SERIALIZE_BIT); \ + return *(NAME##_dma_nextid_ptr()); \ + } \ +\ + inline void NAME##_dma_blk_memcpy(uint64_t dst, uint64_t src, uint64_t size) { \ + volatile uint64_t tf_id = NAME##_dma_memcpy(dst, src, size); \ + while (*(NAME##_dma_done_ptr()) != tf_id) { \ + asm volatile("nop"); \ + } \ + } \ +\ + inline uint64_t NAME##_dma_2d_memcpy(uint64_t dst, uint64_t src, uint64_t size, \ + uint64_t dst_stride, uint64_t src_stride, \ + uint64_t num_reps) { \ + *(NAME##_dma_src_ptr()) = (uint64_t)src; \ + *(NAME##_dma_dst_ptr()) = (uint64_t)dst; \ + *(NAME##_dma_num_bytes_ptr()) = size; \ + *(NAME##_dma_conf_ptr()) = \ + (DMA_CONF_DECOUPLE << IDMA_REG64_2D_FRONTEND_CONF_DECOUPLE_BIT) | \ + (DMA_CONF_DEBURST << IDMA_REG64_2D_FRONTEND_CONF_DEBURST_BIT) | \ + (DMA_CONF_SERIALIZE << IDMA_REG64_2D_FRONTEND_CONF_SERIALIZE_BIT); \ + *(NAME##_dma_src_stride_ptr()) = src_stride; \ + *(NAME##_dma_dst_stride_ptr()) = dst_stride; \ + *(NAME##_dma_num_reps_ptr()) = num_reps; \ + return *(NAME##_dma_nextid_ptr()); \ + } \ +\ + inline void NAME##_dma_2d_blk_memcpy(uint64_t dst, uint64_t src, uint64_t size, \ + uint64_t dst_stride, uint64_t src_stride, \ + uint64_t num_reps) { \ + volatile uint64_t tf_id = \ + NAME##_dma_2d_memcpy(dst, src, size, dst_stride, src_stride, num_reps); \ + while (*(NAME##_dma_done_ptr()) != tf_id) { \ + asm volatile("nop"); \ + } \ + } \ +\ + inline uint64_t NAME##_dma_get_status(void) { \ + return *(NAME##_dma_status_ptr()); \ + } + +X(sys, &__base_dma); + +#undef X diff --git a/sw/include/regs/idma.h b/sw/include/regs/idma.h new file mode 100644 index 00000000..7aa43052 --- /dev/null +++ b/sw/include/regs/idma.h @@ -0,0 +1,57 @@ +// Generated register defines for idma_reg64_2d_frontend + +// Copyright information found in source file: +// Copyright 2022 ETH Zurich and University of Bologna. + +// Licensing information found in source file: +// Licensed under Solderpad Hardware License, Version 0.51 +// SPDX-License-Identifier: SHL-0.51 + +#ifndef _IDMA_REG64_2D_FRONTEND_REG_DEFS_ +#define _IDMA_REG64_2D_FRONTEND_REG_DEFS_ + +#ifdef __cplusplus +extern "C" { +#endif +// Register width +#define IDMA_REG64_2D_FRONTEND_PARAM_REG_WIDTH 64 + +// Source Address +#define IDMA_REG64_2D_FRONTEND_SRC_ADDR_REG_OFFSET 0x0 + +// Destination Address +#define IDMA_REG64_2D_FRONTEND_DST_ADDR_REG_OFFSET 0x8 + +// Number of bytes +#define IDMA_REG64_2D_FRONTEND_NUM_BYTES_REG_OFFSET 0x10 + +// Configuration Register for DMA settings +#define IDMA_REG64_2D_FRONTEND_CONF_REG_OFFSET 0x18 +#define IDMA_REG64_2D_FRONTEND_CONF_DECOUPLE_BIT 0 +#define IDMA_REG64_2D_FRONTEND_CONF_DEBURST_BIT 1 +#define IDMA_REG64_2D_FRONTEND_CONF_SERIALIZE_BIT 2 + +// DMA Status +#define IDMA_REG64_2D_FRONTEND_STATUS_REG_OFFSET 0x20 +#define IDMA_REG64_2D_FRONTEND_STATUS_BUSY_BIT 0 + +// Next ID, launches transfer, returns 0 if transfer not set up properly. +#define IDMA_REG64_2D_FRONTEND_NEXT_ID_REG_OFFSET 0x28 + +// Get ID of finished transactions. +#define IDMA_REG64_2D_FRONTEND_DONE_REG_OFFSET 0x30 + +// Source Stride +#define IDMA_REG64_2D_FRONTEND_STRIDE_SRC_REG_OFFSET 0x38 + +// Destination Stride +#define IDMA_REG64_2D_FRONTEND_STRIDE_DST_REG_OFFSET 0x40 + +// Number of 2D repetitions +#define IDMA_REG64_2D_FRONTEND_NUM_REPETITIONS_REG_OFFSET 0x48 + +#ifdef __cplusplus +} // extern "C" +#endif +#endif // _IDMA_REG64_2D_FRONTEND_REG_DEFS_ +// End generated register defines for idma_reg64_2d_frontend \ No newline at end of file diff --git a/sw/sw.mk b/sw/sw.mk index aa708bb7..07ed3637 100644 --- a/sw/sw.mk +++ b/sw/sw.mk @@ -77,6 +77,7 @@ endef $(eval $(call chs_sw_gen_hdr_rule,clint,$(CLINTROOT)/src/clint.hjson $(CLINTROOT)/.generated)) $(eval $(call chs_sw_gen_hdr_rule,serial_link,$(CHS_ROOT)/hw/serial_link.hjson $(CHS_SLINK_DIR)/.generated)) $(eval $(call chs_sw_gen_hdr_rule,axi_vga,$(AXI_VGA_ROOT)/data/axi_vga.hjson $(AXI_VGA_ROOT)/.generated)) +$(eval $(call chs_sw_gen_hdr_rule,idma,$(IDMA_ROOT)/src/frontends/register_64bit_2d/idma_reg64_2d_frontend.hjson)) $(eval $(call chs_sw_gen_hdr_rule,axi_llc,$(CHS_LLC_DIR)/data/axi_llc_regs.hjson)) $(eval $(call chs_sw_gen_hdr_rule,cheshire,$(CHS_ROOT)/hw/regs/cheshire_regs.hjson)) $(eval $(call chs_sw_gen_hdr_rule,axi_rt,$(CHS_ROOT)/hw/regs/axi_rt_regs.hjson)) diff --git a/target/xilinx/src/cheshire_top_xilinx.sv b/target/xilinx/src/cheshire_top_xilinx.sv index f9763f54..188e0ce3 100644 --- a/target/xilinx/src/cheshire_top_xilinx.sv +++ b/target/xilinx/src/cheshire_top_xilinx.sv @@ -143,6 +143,11 @@ module cheshire_top_xilinx DmaConfMaxWriteTxns : 4, DmaConfAmoNumCuts : 1, DmaConfAmoPostCut : 1, + DmaConfEnableTwoD : 1, + DmaNumAxInFlight : 16, + DmaMemSysDepth : 8, + DmaJobFifoDepth : 2, + DmaRAWCouplingAvail : 1, // GPIOs GpioInputSyncs : 1, // All non-set values should be zero