From d0d6ba73a504d555a57a8ee6e2f729c89ce9c4f1 Mon Sep 17 00:00:00 2001 From: erwei-xilinx Date: Wed, 18 Sep 2024 14:41:27 -0700 Subject: [PATCH] Have AIECtrlPacketToDmaPass create dynamic `MemRefType` for ctrl packet stream (#1776) --- lib/Dialect/AIEX/IR/AIEXDialect.cpp | 5 +++-- lib/Dialect/AIEX/Transforms/AIECtrlPacketToDma.cpp | 4 +++- test/dialect/AIEX/ctrl_pkt_to_dma.mlir | 12 ++++++------ test/npu-xrt/ctrl_packet_reconfig/test.cpp | 3 +-- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/lib/Dialect/AIEX/IR/AIEXDialect.cpp b/lib/Dialect/AIEX/IR/AIEXDialect.cpp index ac6e29a8ce..12b89aad32 100644 --- a/lib/Dialect/AIEX/IR/AIEXDialect.cpp +++ b/lib/Dialect/AIEX/IR/AIEXDialect.cpp @@ -347,8 +347,9 @@ LogicalResult AIEX::NpuDmaMemcpyNdOp::verify() { if (buffer.getElementTypeBitWidth() > addressGranularity) { return emitOpError("Maximum element bit width allowed is ") << addressGranularity << "bits. "; - } else if ((buffer.getNumElements() * buffer.getElementTypeBitWidth()) < - addressGranularity) { + } else if (buffer.hasStaticShape() && + (buffer.getNumElements() * buffer.getElementTypeBitWidth()) < + addressGranularity) { return emitOpError("Minimum data transfer size required is ") << addressGranularity << "bits. "; } diff --git a/lib/Dialect/AIEX/Transforms/AIECtrlPacketToDma.cpp b/lib/Dialect/AIEX/Transforms/AIECtrlPacketToDma.cpp index 0b76309a60..f56f1cee3e 100644 --- a/lib/Dialect/AIEX/Transforms/AIECtrlPacketToDma.cpp +++ b/lib/Dialect/AIEX/Transforms/AIECtrlPacketToDma.cpp @@ -76,8 +76,10 @@ struct AIECtrlPacketToDmaPass : AIECtrlPacketToDmaBase { auto newSeq = builder.create(loc, f.getSymNameAttr()); newSeq.getBody().push_back(new Block); + + // Using dynamic shape for ctrl pkt stream. auto ctrlPktMemrefType = MemRefType::get( - SmallVector{1024}, IntegerType::get(ctx, 32), nullptr, 0); + ShapedType::kDynamic, IntegerType::get(ctx, 32), nullptr, 0); auto newBlockArg = newSeq.getBody().addArgument(ctrlPktMemrefType, loc); builder.setInsertionPointToStart(&newSeq.getBody().front()); diff --git a/test/dialect/AIEX/ctrl_pkt_to_dma.mlir b/test/dialect/AIEX/ctrl_pkt_to_dma.mlir index 0ee76178c7..d8e8a4c44f 100644 --- a/test/dialect/AIEX/ctrl_pkt_to_dma.mlir +++ b/test/dialect/AIEX/ctrl_pkt_to_dma.mlir @@ -13,13 +13,13 @@ // transforms control packet ops to dma memcpy ops and sync ops. // CHECK-LABEL: aie.device(npu1_1col) { -// CHECK: aiex.runtime_sequence(%[[ARG0:.*]]: memref<1024xi32>) { -// CHECK: aiex.npu.dma_memcpy_nd(0, 0, %[[ARG0]][0, 0, 0, 0][1, 1, 1, 2][0, 0, 0, 1], packet = ) {id = 0 : i64, issue_token = true, metadata = @ctrlpkt_col0_mm2s_chan0} : memref<1024xi32> +// CHECK: aiex.runtime_sequence(%[[ARG0:.*]]: memref) { +// CHECK: aiex.npu.dma_memcpy_nd(0, 0, %[[ARG0]][0, 0, 0, 0][1, 1, 1, 2][0, 0, 0, 1], packet = ) {id = 0 : i64, issue_token = true, metadata = @ctrlpkt_col0_mm2s_chan0} : memref // CHECK: aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 1 : i32, row = 0 : i32, row_num = 1 : i32} aie.device(npu1_1col) { %tile_0_0 = aie.tile(0, 0) {controller_id = #aie.packet_info} - aiex.runtime_sequence(%arg0: memref<2048xi32>) { + aiex.runtime_sequence() { aiex.control_packet {address = 126976 : ui32, data = array, opcode = 0 : i32, stream_id = 0 : i32} } aie.shim_dma_allocation @ctrlpkt_col0_mm2s_chan0(MM2S, 0, 0) @@ -29,14 +29,14 @@ aie.device(npu1_1col) { // ----- // CHECK-LABEL: aie.device(npu1_1col) { -// CHECK: aiex.runtime_sequence(%[[ARG0:.*]]: memref<1024xi32>) { -// CHECK: aiex.npu.dma_memcpy_nd(0, 0, %[[ARG0]][0, 0, 0, 0][1, 1, 1, 2][0, 0, 0, 1], packet = ) {id = 0 : i64, issue_token = true, metadata = @ctrlpkt_col0_mm2s_chan0} : memref<1024xi32> +// CHECK: aiex.runtime_sequence(%[[ARG0:.*]]: memref) { +// CHECK: aiex.npu.dma_memcpy_nd(0, 0, %[[ARG0]][0, 0, 0, 0][1, 1, 1, 2][0, 0, 0, 1], packet = ) {id = 0 : i64, issue_token = true, metadata = @ctrlpkt_col0_mm2s_chan0} : memref // CHECK: aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 1 : i32, row = 0 : i32, row_num = 1 : i32} aie.device(npu1_1col) { %tile_0_0 = aie.tile(0, 0) %tile_0_2 = aie.tile(0, 2) {controller_id = #aie.packet_info} - aiex.runtime_sequence(%arg0: memref<2048xi32>) { + aiex.runtime_sequence() { aiex.control_packet {address = 2301952 : ui32, data = array, opcode = 0 : i32, stream_id = 0 : i32} } aie.shim_dma_allocation @ctrlpkt_col0_mm2s_chan0(MM2S, 0, 0) diff --git a/test/npu-xrt/ctrl_packet_reconfig/test.cpp b/test/npu-xrt/ctrl_packet_reconfig/test.cpp index eac1ddd2d7..0ea1283140 100644 --- a/test/npu-xrt/ctrl_packet_reconfig/test.cpp +++ b/test/npu-xrt/ctrl_packet_reconfig/test.cpp @@ -23,7 +23,6 @@ constexpr int IN_SIZE = 64 * 64; constexpr int OUT_SIZE = 64 * 64; -constexpr int CTRL_IN_SIZE = 1024; #define IN_DATATYPE int8_t #define OUT_DATATYPE int8_t @@ -89,7 +88,7 @@ int main(int argc, const char *argv[]) { XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3)); auto bo_out = xrt::bo(device, OUT_SIZE * sizeof(OUT_DATATYPE), XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4)); - auto bo_ctrlpkt = xrt::bo(device, CTRL_IN_SIZE * sizeof(int32_t), + auto bo_ctrlpkt = xrt::bo(device, ctrlPackets.size() * sizeof(int32_t), XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3)); IN_DATATYPE *bufInA = bo_inA.map();