diff --git a/include/aie/Dialect/AIE/Transforms/AIEGenerateColumnControlOverlay.h b/include/aie/Dialect/AIE/Transforms/AIEGenerateColumnControlOverlay.h index 47c3f5620f..3af5a497f0 100644 --- a/include/aie/Dialect/AIE/Transforms/AIEGenerateColumnControlOverlay.h +++ b/include/aie/Dialect/AIE/Transforms/AIEGenerateColumnControlOverlay.h @@ -48,4 +48,4 @@ DenseMap getTileToControllerIdMap(bool clColumnWiseUniqueIDs, const AIETargetModel &targetModel); -#endif \ No newline at end of file +#endif diff --git a/include/aie/Dialect/AIEX/IR/AIEX.td b/include/aie/Dialect/AIEX/IR/AIEX.td index 784fc16710..3f85a7e625 100644 --- a/include/aie/Dialect/AIEX/IR/AIEX.td +++ b/include/aie/Dialect/AIEX/IR/AIEX.td @@ -806,6 +806,10 @@ def AIE_NpuControlPacketOp: AIEX_Op<"control_packet", []> { The control_packet operation represents a low-level AIE control packet header and payload. }]; + let extraClassDeclaration = [{ + uint32_t getRowFromAddr(); + uint32_t getColumnFromAddr(); + }]; } // NPU Bd Write operation diff --git a/include/aie/Dialect/AIEX/Transforms/AIEXPasses.h b/include/aie/Dialect/AIEX/Transforms/AIEXPasses.h index 0c82e48b8e..14d7e87a5a 100644 --- a/include/aie/Dialect/AIEX/Transforms/AIEXPasses.h +++ b/include/aie/Dialect/AIEX/Transforms/AIEXPasses.h @@ -38,6 +38,10 @@ std::unique_ptr> createAIEDMATasksToNPUPass(); std::unique_ptr> createAIESubstituteShimDMAAllocationsPass(); +std::unique_ptr> +createAIECtrlPacketToDmaPass(); +std::unique_ptr> +createAIECtrlPacketInferTilesPass(); /// Generate the code for registering passes. #define GEN_PASS_REGISTRATION diff --git a/include/aie/Dialect/AIEX/Transforms/AIEXPasses.td b/include/aie/Dialect/AIEX/Transforms/AIEXPasses.td index 4722821b85..11c2dd8005 100644 --- a/include/aie/Dialect/AIEX/Transforms/AIEXPasses.td +++ b/include/aie/Dialect/AIEX/Transforms/AIEXPasses.td @@ -190,4 +190,24 @@ def AIESubstituteShimDMAAllocations : Pass<"aie-substitute-shim-dma-allocations" ]; } +def AIECtrlPacketToDma : Pass<"aie-ctrl-packet-to-dma", "AIE::DeviceOp"> { + let summary = "Lowers npu.control_packet op to npu.dma_memcpy_nd op"; + + let constructor = "xilinx::AIEX::createAIECtrlPacketToDmaPass()"; + let dependentDialects = [ + "xilinx::AIE::AIEDialect", + "xilinx::AIEX::AIEXDialect", + ]; +} + +def AIECtrlPacketInferTiles : Pass<"aie-ctrl-packet-infer-tiles", "AIE::DeviceOp"> { + let summary = "Infer aie.tile ops from aiex.control_packet addresses"; + + let constructor = "xilinx::AIEX::createAIECtrlPacketInferTilesPass()"; + let dependentDialects = [ + "xilinx::AIE::AIEDialect", + "xilinx::AIEX::AIEXDialect", + ]; +} + #endif diff --git a/lib/Dialect/AIEX/IR/AIEXDialect.cpp b/lib/Dialect/AIEX/IR/AIEXDialect.cpp index 765a2a12ab..55351a09ae 100644 --- a/lib/Dialect/AIEX/IR/AIEXDialect.cpp +++ b/lib/Dialect/AIEX/IR/AIEXDialect.cpp @@ -642,3 +642,23 @@ LogicalResult AIEX::DMAStartBdChainOp::verify() { } return success(); } + +//===----------------------------------------------------------------------===// +// NpuControlPacketOp +//===----------------------------------------------------------------------===// + +uint32_t AIEX::NpuControlPacketOp::getRowFromAddr() { + const auto &targetModel = AIE::getTargetModel(*this); + uint32_t addr = getAddress(); + uint32_t rowInt = + (addr & (0xff << targetModel.getRowShift())) >> targetModel.getRowShift(); + return rowInt; +} + +uint32_t AIEX::NpuControlPacketOp::getColumnFromAddr() { + const auto &targetModel = AIE::getTargetModel(*this); + uint32_t addr = getAddress(); + uint32_t colInt = (addr & (0xff << targetModel.getColumnShift())) >> + targetModel.getColumnShift(); + return colInt; +} diff --git a/lib/Dialect/AIEX/Transforms/AIECtrlPacketToDma.cpp b/lib/Dialect/AIEX/Transforms/AIECtrlPacketToDma.cpp new file mode 100644 index 0000000000..0b76309a60 --- /dev/null +++ b/lib/Dialect/AIEX/Transforms/AIECtrlPacketToDma.cpp @@ -0,0 +1,157 @@ +//===- AIECtrlPacketToDma.cpp -----------------------------------*- C++ -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2024 Advanced Micro Devices Inc. +// +//===----------------------------------------------------------------------===// + +#include "aie/Dialect/AIE/IR/AIEDialect.h" +#include "aie/Dialect/AIE/Transforms/AIEGenerateColumnControlOverlay.h" +#include "aie/Dialect/AIEX/IR/AIEXDialect.h" +#include "aie/Dialect/AIEX/Transforms/AIEXPasses.h" + +#include "mlir/IR/Attributes.h" +#include "mlir/Pass/Pass.h" + +#include "llvm/ADT/TypeSwitch.h" + +#define DEBUG_TYPE "aie-ctrl-packet-to-dma" + +using namespace mlir; +using namespace xilinx; +using namespace xilinx::AIE; +using namespace xilinx::AIEX; + +struct AIECtrlPacketInferTilesPass + : AIECtrlPacketInferTilesBase { + void runOnOperation() override { + DeviceOp device = getOperation(); + const auto &targetModel = device.getTargetModel(); + OpBuilder devBuilder = OpBuilder::atBlockBegin(device.getBody()); + + auto sequenceOps = device.getOps(); + for (auto f : sequenceOps) { + auto ctrlPktOps = f.getOps(); + for (auto ctrlPktOp : ctrlPktOps) { + auto tOp = TileOp::getOrCreate(devBuilder, device, + (int)ctrlPktOp.getColumnFromAddr(), + (int)ctrlPktOp.getRowFromAddr()); + // Assign controller id + auto tileIDMap = getTileToControllerIdMap(true, targetModel); + if (tOp->hasAttr("controller_id")) + continue; + auto pktInfoAttr = AIE::PacketInfoAttr::get( + tOp->getContext(), /*pkt_type*/ 0, + /*pkt_id*/ tileIDMap[{tOp.colIndex(), tOp.rowIndex()}]); + tOp->setAttr("controller_id", pktInfoAttr); + } + } + } +}; + +struct AIECtrlPacketToDmaPass : AIECtrlPacketToDmaBase { + void runOnOperation() override { + DeviceOp device = getOperation(); + const auto &targetModel = device.getTargetModel(); + auto ctx = device->getContext(); + auto loc = device->getLoc(); + OpBuilder devBuilder = OpBuilder::atBlockBegin(device.getBody()); + + if (targetModel.getTargetArch() == AIEArch::AIE1) + return; // Disable this pass for AIE1; AIE1 support NYI. + + SmallVector erased; + auto sequenceOps = device.getOps(); + for (auto f : sequenceOps) { + + auto controlPacketOps = f.getOps(); + if (controlPacketOps.empty()) + continue; + + OpBuilder builder(f); + + auto newSeq = + builder.create(loc, f.getSymNameAttr()); + newSeq.getBody().push_back(new Block); + auto ctrlPktMemrefType = MemRefType::get( + SmallVector{1024}, IntegerType::get(ctx, 32), nullptr, 0); + auto newBlockArg = newSeq.getBody().addArgument(ctrlPktMemrefType, loc); + builder.setInsertionPointToStart(&newSeq.getBody().front()); + + int ddrOffset = 0; + Block &entry = f.getBody().front(); + for (auto &o : entry) { + llvm::TypeSwitch(&o).Case( + [&](auto op) { + // Destination tile info + int col = op.getColumnFromAddr(); + int row = op.getRowFromAddr(); + AIE::TileOp destTileOp = + TileOp::getOrCreate(devBuilder, device, col, row); + assert(destTileOp->hasAttr("controller_id")); + auto controllerIdPkt = + destTileOp->getAttrOfType( + "controller_id"); + + // Control packet offset (to raw data at ddr) and size + uint32_t ctrlPktSize = 0; + auto data = op.getData(); + auto length = op.getLength(); + if (data) + ctrlPktSize = data->size(); + if (!data && length) + ctrlPktSize = *length; + ctrlPktSize++; // Ctrl info word + + const std::vector staticOffsets = {0, 0, 0, ddrOffset}; + ddrOffset += ctrlPktSize; + const std::vector staticSizes = {1, 1, 1, + (int64_t)ctrlPktSize}; + const std::vector staticStrides = {0, 0, 0, 1}; + + // Shim dma alloc symbol name + std::string shimDmaAllocName = "ctrlpkt"; + shimDmaAllocName += "_col" + std::to_string(col); + shimDmaAllocName += "_mm2s"; + auto rowToShimChanMap = + getRowToShimChanMap(targetModel, WireBundle::DMA); + int shimChan = rowToShimChanMap[destTileOp.rowIndex()]; + shimDmaAllocName += "_chan" + std::to_string(shimChan); + + StringRef metadata = builder.getStringAttr(shimDmaAllocName); + builder.create( + builder.getUnknownLoc(), 0, 0, newBlockArg, + SmallVector{}, SmallVector{}, + SmallVector{}, ArrayRef(staticOffsets), + ArrayRef(staticSizes), ArrayRef(staticStrides), + controllerIdPkt, metadata, 0, true); + + auto shimRow = builder.getI32IntegerAttr(0); + auto shimCol = builder.getI32IntegerAttr(col); + auto dir = builder.getI32IntegerAttr(1); // MM2S + auto chan = builder.getI32IntegerAttr(shimChan); + auto col_num = builder.getI32IntegerAttr(1); + auto row_num = builder.getI32IntegerAttr(1); + builder.create(loc, shimCol, shimRow, dir, chan, + col_num, row_num); + }); + } + + erased.push_back(f); + } + + for (auto e : erased) + e->erase(); + } +}; + +std::unique_ptr> +AIEX::createAIECtrlPacketInferTilesPass() { + return std::make_unique(); +} +std::unique_ptr> AIEX::createAIECtrlPacketToDmaPass() { + return std::make_unique(); +} diff --git a/lib/Dialect/AIEX/Transforms/CMakeLists.txt b/lib/Dialect/AIEX/Transforms/CMakeLists.txt index ca588ce769..3caa148869 100644 --- a/lib/Dialect/AIEX/Transforms/CMakeLists.txt +++ b/lib/Dialect/AIEX/Transforms/CMakeLists.txt @@ -18,6 +18,7 @@ add_mlir_dialect_library(AIEXTransforms AIEAssignRuntimeSequenceBDIDs.cpp AIEDMATasksToNPU.cpp AIESubstituteShimDMAAllocations.cpp + AIECtrlPacketToDma.cpp ADDITIONAL_HEADER_DIRS ${AIE_BINARY_DIR}/include diff --git a/test/dialect/AIEX/ctrl_pkt_infer_tile_ops.mlir b/test/dialect/AIEX/ctrl_pkt_infer_tile_ops.mlir new file mode 100644 index 0000000000..81275c2eab --- /dev/null +++ b/test/dialect/AIEX/ctrl_pkt_infer_tile_ops.mlir @@ -0,0 +1,33 @@ +//===- ctrl_pkt_infer_tile_ops.mlir ----------------------------*- MLIR -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2024 Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +// RUN: aie-opt %s -aie-ctrl-packet-infer-tiles --split-input-file | FileCheck %s + +// infer aie.tile ops based on control packet op's address + +// CHECK-LABEL: aie.device(npu1_1col) { +// CHECK: aie.tile(0, 0) {controller_id = #aie.packet_info} + +aie.device(npu1_1col) { + aiex.runtime_sequence(%arg0: memref<2048xi32>) { + aiex.control_packet {address = 126976 : ui32, data = array, opcode = 0 : i32, stream_id = 0 : i32} + } +} + +// ----- + +// CHECK-LABEL: aie.device(npu1_1col) { +// CHECK: aie.tile(0, 2) {controller_id = #aie.packet_info} + +aie.device(npu1_1col) { + aiex.runtime_sequence(%arg0: memref<2048xi32>) { + aiex.control_packet {address = 2301952 : ui32, data = array, opcode = 0 : i32, stream_id = 0 : i32} + } +} diff --git a/test/dialect/AIEX/ctrl_pkt_to_dma.mlir b/test/dialect/AIEX/ctrl_pkt_to_dma.mlir new file mode 100644 index 0000000000..0ee76178c7 --- /dev/null +++ b/test/dialect/AIEX/ctrl_pkt_to_dma.mlir @@ -0,0 +1,44 @@ +//===- ctrl_pkt_to_dma.mlir ------------------------------------*- MLIR -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2024 Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +// RUN: aie-opt %s -aie-ctrl-packet-to-dma --split-input-file | FileCheck %s + +// transforms control packet ops to dma memcpy ops and sync ops. + +// CHECK-LABEL: aie.device(npu1_1col) { +// CHECK: aiex.runtime_sequence(%[[ARG0:.*]]: memref<1024xi32>) { +// CHECK: aiex.npu.dma_memcpy_nd(0, 0, %[[ARG0]][0, 0, 0, 0][1, 1, 1, 2][0, 0, 0, 1], packet = ) {id = 0 : i64, issue_token = true, metadata = @ctrlpkt_col0_mm2s_chan0} : memref<1024xi32> +// CHECK: aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 1 : i32, row = 0 : i32, row_num = 1 : i32} + +aie.device(npu1_1col) { + %tile_0_0 = aie.tile(0, 0) {controller_id = #aie.packet_info} + aiex.runtime_sequence(%arg0: memref<2048xi32>) { + aiex.control_packet {address = 126976 : ui32, data = array, opcode = 0 : i32, stream_id = 0 : i32} + } + aie.shim_dma_allocation @ctrlpkt_col0_mm2s_chan0(MM2S, 0, 0) + memref.global "public" @ctrlpkt_col0_mm2s_chan0 : memref<2048xi32> +} + +// ----- + +// CHECK-LABEL: aie.device(npu1_1col) { +// CHECK: aiex.runtime_sequence(%[[ARG0:.*]]: memref<1024xi32>) { +// CHECK: aiex.npu.dma_memcpy_nd(0, 0, %[[ARG0]][0, 0, 0, 0][1, 1, 1, 2][0, 0, 0, 1], packet = ) {id = 0 : i64, issue_token = true, metadata = @ctrlpkt_col0_mm2s_chan0} : memref<1024xi32> +// CHECK: aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 1 : i32, row = 0 : i32, row_num = 1 : i32} + +aie.device(npu1_1col) { + %tile_0_0 = aie.tile(0, 0) + %tile_0_2 = aie.tile(0, 2) {controller_id = #aie.packet_info} + aiex.runtime_sequence(%arg0: memref<2048xi32>) { + aiex.control_packet {address = 2301952 : ui32, data = array, opcode = 0 : i32, stream_id = 0 : i32} + } + aie.shim_dma_allocation @ctrlpkt_col0_mm2s_chan0(MM2S, 0, 0) + memref.global "public" @ctrlpkt_col0_mm2s_chan0 : memref<2048xi32> +}