diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEOps.td b/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEOps.td index 271b596b3..11bda3868 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEOps.td +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEOps.td @@ -1003,42 +1003,6 @@ def AMDAIE_LogicalObjectFifoFromMemrefOp let cppNamespace = "mlir::iree_compiler::AMDAIE"; } -def AMDAIE_LogicalObjectFifoLink - : AMDAIE_Op<"logicalobjectfifo.link", [AttrSizedOperandSegments]> { - let summary = "Link DMA operations operating on a logical objectFifo."; - let description = [{ - The `amdaie.logicalobjectfifo.link` operation explicitly links the DMA - operations operating on a logical objectFifo. This operation mimics the - `aie.objectFifo.link` operation and is useful as an intermediary before - conversion to the AIE dialect. - - Example: - ```mlir - %1 = amdaie.logicalobjectfifo.from_memref %alloc, {%tile} - : memref<64x32xi32, 1> -> !amdaie.logicalobjectfifo> - %3 = amdaie.circular_dma_cpy_nd(%1[] [] [], %0[] [] []) - : (!amdaie.logicalobjectfifo>, - !amdaie.logicalobjectfifo>) - %4 = amdaie.circular_dma_cpy_nd(%2[] [] [], - %1[%c0, %c0, %c0] [%c8, %c32, %c8] [%c8, %c64, %c1]) - : (!amdaie.logicalobjectfifo>, - !amdaie.logicalobjectfifo>) - amdaie.logicalobjectfifo.link[%3] -> [%4] () - ``` - Here, two circular DMA operations operating on logical objectFifo `%1` are - linked explicitly through the `amdaie.logicalobjectfifo.link` operation. - }]; - - let arguments = ( - ins Variadic:$ins, - Variadic:$outs - ); - - let assemblyFormat = [{ - `[` ($ins^)? `]` `->` `[` ($outs^)? `]` `(` `)` attr-dict - }]; -} - def AMDAIE_LogicalObjectFifoPlaceholderOp: AMDAIE_Op<"logicalobjectfifo.placeholder", [ LogicalObjFifoOpInterface, Pure]> { diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/test/roundtrip.mlir b/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/test/roundtrip.mlir index 39dddbc9d..47054c91f 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/test/roundtrip.mlir +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/test/roundtrip.mlir @@ -207,21 +207,6 @@ func.func @logicalobjectfifo_from_buffers() { // ----- -// CHECK-LABEL: func.func @logicalobjectfifo_link -// CHECK: %[[DMA0:.+]] = amdaie.circular_dma_cpy_nd -// CHECK: %[[DMA1:.+]] = amdaie.circular_dma_cpy_nd -// CHECK: amdaie.logicalobjectfifo.link -// CHECK-SAME: %[[DMA0]] -// CHECK-SAME: %[[DMA1]] -func.func @logicalobjectfifo_link(%arg0: !amdaie.logicalobjectfifo>, %arg1: !amdaie.logicalobjectfifo>, %arg2: !amdaie.logicalobjectfifo>) { - %0 = amdaie.circular_dma_cpy_nd(%arg1[] [] [], %arg0[] [] []) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %1 = amdaie.circular_dma_cpy_nd(%arg2[] [] [], %arg1[] [] []) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - amdaie.logicalobjectfifo.link[%0] -> [%1] () - return -} - -// ----- - // CHECK-LABEL: func.func @logicalobjectfifo_placeholder // CHECK: %[[C0:.+]] = arith.constant 0 : index // CHECK: %[[tile_0_0:.+]] = amdaie.tile(%[[C0]], %[[C0]]) diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIECreateLogicalObjectFifoLink.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIECreateLogicalObjectFifoLink.cpp deleted file mode 100644 index 928c2c19f..000000000 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIECreateLogicalObjectFifoLink.cpp +++ /dev/null @@ -1,259 +0,0 @@ -// Copyright 2024 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include "iree-amd-aie/IR/AMDAIEOps.h" -#include "iree-amd-aie/Transforms/AMDAIEDmaUtils.h" -#include "iree-amd-aie/Transforms/Passes.h" -#include "iree-amd-aie/Transforms/Transforms.h" -#include "mlir/Support/LogicalResult.h" - -#define DEBUG_TYPE "iree-amdaie-create-logical-objectfifo-link" - -namespace mlir::iree_compiler::AMDAIE { - -/// Utility to verify that there are no overlapping access patterns between -/// different strided operations as this is not supported in the MLIR-AIE -/// backend for now. -/// -/// Example: -/// -/// There are two strided DMA operations on the same logical objectfifo with -/// following access patterns: -/// -/// Access pattern 1: (offsets: [0, 0], sizes: [2, 8], strides: [32, 1]) -/// Access pattern 2: (offsets: [1, 0, 0], sizes: [1, 2, 8], strides: [32, 32, -/// 1]) -/// -/// In this case, access pattern 1 access elements in range [0, 63], while -/// access pattern 2 access elements in range [32, 95]. Therefore, -/// these two access patterns overlap by both accessing elements in range [32, -/// 63]. -template -LogicalResult checkForContiguousAccessPatterns( - SmallVector> &connectionOps) { - for (auto &&[i, connectionOpAndOffset] : llvm::enumerate(connectionOps)) { - FailureOr npuDmaUserOp = - connectionOpAndOffset.first.getNpuCircularDmaCpyNdUser(); - if (failed(npuDmaUserOp)) return failure(); - auto stridedOp = cast( - npuDmaUserOp.value().getOperation()); - std::optional extent; - if constexpr (OperateOn == CopyOpOperateOn::Source) { - extent = stridedOp.getSourceStaticExtent(); - } else { - extent = stridedOp.getTargetStaticExtent(); - } - if (!extent) { - return stridedOp.emitOpError() - << "has a non-constant access extent, which is not supported"; - } - int64_t offset = connectionOpAndOffset.second; - if (i < (connectionOps.size() - 1)) { - if (offset + extent.value() != connectionOps[i + 1].second) { - // TODO(newling) my understanding from the code is that the link - // operation effectively replaces the cumulative offset of each - // circular_dma_cpy_nd with the differential offset with - // the previous circular_dma_cpy_nd in the 'link' list. - // - // This however is hardcoded to a zero offset (later in the pass where - // discardAllNonZeroOffsets is called, offsets are set to zero). This - // effectively is constraining the link operation to only work with - // contiguous access patterns. - // - // Is this a bug? - return stridedOp.emitOpError() - << "has access pattern of which isn't contiguous with next one " - "-- not currently supported."; - } - } - } - return success(); -} - -/// Utility to add explicit link operations to avoid having to do this during -/// conversion to AIEDialect operations. This function only consider L2/MT for -/// links as L1/L3 don't need this linking through AIE objectFifos. Furthermore, -/// it assumes that all users of a logical objectFifo reside within the same -/// block and an error will be emitted if that's not the case. -LogicalResult createLogicalObjectFifoLink( - RewriterBase &rewriter, - AMDAIE::LogicalObjectFifoFromMemrefOp logicalObjectFifo, - SmallVector &newLinkOps) { - Attribute memSpace = logicalObjectFifo.getMemorySpace(); - if (!memSpace || dyn_cast(memSpace).getInt() != 1) { - return success(); - } - - // Visit all DoublyStridedOpInterface users of this logical objectFifo and - // add them to either the input or output side of this logical objectFifo - // together with the base offset to be used later for sorting. While doing - // this, keep track of the last user operation for insertion purposes. - SmallVector> ins; - SmallVector> outs; - AMDAIE::ConnectionOp lastUserOp; - for (Operation *userOp : logicalObjectFifo->getUsers()) { - auto connectionOp = dyn_cast(userOp); - if (!connectionOp) { - return logicalObjectFifo.emitOpError() - << "found user which is not an `amdaie.connection` op"; - } - FailureOr npuDmaUserOp = - connectionOp.getNpuCircularDmaCpyNdUser(); - if (failed(npuDmaUserOp)) return failure(); - auto stridedOp = cast( - npuDmaUserOp.value().getOperation()); - - if (lastUserOp && connectionOp->getBlock() != lastUserOp->getBlock()) { - logicalObjectFifo->emitError( - "has copy-like users not residing in the same block"); - return failure(); - } - if (!lastUserOp || lastUserOp->isBeforeInBlock(connectionOp)) { - lastUserOp = connectionOp; - } - - auto sourceLogicalObjectFifo = - dyn_cast_if_present( - connectionOp.getSource().getDefiningOp()); - // The `sourceLogicalObjectFifo` could be either a - // `LogicalObjectFifoFromMemrefOp` or `LogicalObjectFifoPlaceholderOp`, - // but currently the linking only works with - // `LogicalObjectFifoFromMemrefOp` on L2. - if (sourceLogicalObjectFifo && - logicalObjectFifo == sourceLogicalObjectFifo) { - if (std::optional offset = - stridedOp.getSourceStaticBaseOffset()) { - outs.push_back(std::make_pair(connectionOp, offset.value())); - } else { - return stridedOp.emitOpError() - << "non-constant offset found which is not supported"; - } - } else { - if (std::optional offset = - stridedOp.getTargetStaticBaseOffset()) { - ins.push_back(std::make_pair(connectionOp, offset.value())); - } else { - return stridedOp.emitOpError() - << "non-constant offset found which is not supported"; - } - } - } - - // Sort the inputs and outputs on offset as the link operation uses this order - // to generate correct data buffer sizes. - auto comparator = [](std::pair a, - std::pair b) -> bool { - return a.second < b.second; - }; - - llvm::sort(ins.begin(), ins.end(), comparator); - llvm::sort(outs.begin(), outs.end(), comparator); - - // Check that access patterns are not overlapping between consumers - // respectively producers. - if (failed(checkForContiguousAccessPatterns(ins))) { - return failure(); - } - if (failed(checkForContiguousAccessPatterns(outs))) { - return failure(); - } - - SmallVector inResults = llvm::map_to_vector<8>( - ins, [](std::pair elem) -> Value { - return cast(elem.first->getResult(0)); - }); - SmallVector outResults = llvm::map_to_vector( - outs, [](std::pair elem) -> Value { - return cast(elem.first->getResult(0)); - }); - - // Insert the `LogicalObjectFifoLink` after the last user operation. - if (lastUserOp) { - rewriter.setInsertionPointAfter(lastUserOp); - auto linkOp = rewriter.create( - rewriter.getUnknownLoc(), inResults, outResults); - newLinkOps.push_back(linkOp); - } - return success(); -} - -LogicalResult discardLinkNonZeroOffsets(RewriterBase &rewriter, - AMDAIE::LogicalObjectFifoLink linkOp) { - for (Value input : linkOp.getIns()) { - auto connectionOp = - dyn_cast_if_present(input.getDefiningOp()); - if (!connectionOp) { - return linkOp.emitOpError() - << "found input which is not an `amdaie.connection` op"; - } - FailureOr npuDmaUserOp = - connectionOp.getNpuCircularDmaCpyNdUser(); - if (failed(npuDmaUserOp)) return failure(); - auto stridedOp = cast( - npuDmaUserOp.value().getOperation()); - SmallVector shape; - (void)discardAllNonZeroOffsets(rewriter, stridedOp, - shape); - } - for (Value output : linkOp.getOuts()) { - auto connectionOp = - dyn_cast_if_present(output.getDefiningOp()); - if (!connectionOp) { - return linkOp.emitOpError() - << "found input which is not an `amdaie.connection` op"; - } - FailureOr npuDmaUserOp = - connectionOp.getNpuCircularDmaCpyNdUser(); - if (failed(npuDmaUserOp)) return failure(); - auto stridedOp = cast( - npuDmaUserOp.value().getOperation()); - SmallVector shape; - (void)discardAllNonZeroOffsets(rewriter, stridedOp, - shape); - } - return success(); -} - -namespace { - -struct AMDAIECreateLogicalObjectFifoLinkPass - : public impl::AMDAIECreateLogicalObjectFifoLinkBase< - AMDAIECreateLogicalObjectFifoLinkPass> { - void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); - } - - void runOnOperation() override { - Operation *parentOp = getOperation(); - IRRewriter rewriter(parentOp->getContext()); - - SmallVector newLinkOps; - WalkResult res = parentOp->walk( - [&](AMDAIE::LogicalObjectFifoFromMemrefOp logicalObjectFifo) { - if (failed(createLogicalObjectFifoLink(rewriter, logicalObjectFifo, - newLinkOps))) { - logicalObjectFifo.emitError() << "couldn't create a link operation"; - return WalkResult::interrupt(); - } - return WalkResult::advance(); - }); - if (res.wasInterrupted()) return signalPassFailure(); - - // Remove all non-zero offsets. - for (AMDAIE::LogicalObjectFifoLink linkOp : newLinkOps) { - if (failed(discardLinkNonZeroOffsets(rewriter, linkOp))) - return signalPassFailure(); - } - } -}; - -} // namespace - -std::unique_ptr createAMDAIECreateLogicalObjectFifoLinkPass() { - return std::make_unique(); -} - -} // namespace mlir::iree_compiler::AMDAIE diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/CMakeLists.txt b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/CMakeLists.txt index 7fdad4b25..c8c288835 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/CMakeLists.txt +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/CMakeLists.txt @@ -57,7 +57,6 @@ iree_cc_library( "AMDAIEControlCodeLoopUnroll.cpp" "AMDAIEConvertCoreForallToFor.cpp" "AMDAIECreateAIEWorkgroup.cpp" - "AMDAIECreateLogicalObjectFifoLink.cpp" "AMDAIECreateReferenceToAllocation.cpp" "AMDAIEDistributeCoresAndObjectFifos.cpp" "AMDAIEDmaCSE.cpp" diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp index e2797cb6a..3579daa1e 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp @@ -626,7 +626,6 @@ void addAMDAIEObjectFifoLoweringPasses(OpPassManager &passManager) { passManager.addPass(createCanonicalizerPass()); passManager.addPass(createAMDAIEDmaCSEPass()); - // passManager.addPass(createAMDAIECreateLogicalObjectFifoLinkPass()); passManager.addPass(createAMDAIECanonicalizeDoublyStridedOpPass()); passManager.addPass(createCanonicalizerPass()); diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.h b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.h index 8039fe5a2..637e1cf83 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.h +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.h @@ -93,10 +93,6 @@ std::unique_ptr createAMDAIEConvertCoreForallToForPass(); /// Pass to create a single AIE workgroup. std::unique_ptr createAMDAIECreateAIEWorkgroupPass(); -/// Pass to create logical objectFifo link operations, explicitly linking inputs -/// and outputs. -std::unique_ptr createAMDAIECreateLogicalObjectFifoLinkPass(); - /// Pass to create references to allocations in L1 memory space. std::unique_ptr createAMDAIECreateReferenceToAllocationPass(); diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td index f1d7f54d4..941857577 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td @@ -140,12 +140,6 @@ def AMDAIECreateAIEWorkgroup : let constructor = "mlir::iree_compiler::AMDAIE::createAMDAIECreateAIEWorkgroupPass()"; } -def AMDAIECreateLogicalObjectFifoLink : - Pass<"iree-amdaie-create-logical-objectfifo-link", ""> { - let summary = "Create logical objectFifo link operations, explicitly linking inputs and outputs."; - let constructor = "mlir::iree_compiler::AMDAIE::createAMDAIECreateLogicalObjectFifoLinkPass()"; -} - def AMDAIECreateReferenceToAllocation : InterfacePass<"iree-amdaie-create-reference-to-allocation", "mlir::FunctionOpInterface"> { let summary = "Create references to allocations in L1 memory space."; diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/CMakeLists.txt b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/CMakeLists.txt index 8a86c6e82..ebac90ebf 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/CMakeLists.txt +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/CMakeLists.txt @@ -22,7 +22,6 @@ iree_lit_test_suite( "controlcode_loop_unrolling.mlir" "convert_core_forall_to_for.mlir" "create_aie_workgroup.mlir" - "create_logical_objectfifo_link.mlir" "create_reference_to_allocation.mlir" "disable_vectorization.mlir" "distribute_cores_and_objectfifos.mlir" diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/create_logical_objectfifo_link.mlir b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/create_logical_objectfifo_link.mlir deleted file mode 100644 index 114b94765..000000000 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/create_logical_objectfifo_link.mlir +++ /dev/null @@ -1,288 +0,0 @@ -// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-amdaie-create-logical-objectfifo-link,cse,canonicalize))" --verify-diagnostics %s | FileCheck %s - -// CHECK-LABEL: func.func @link -// CHECK: %[[CONNECTION0:.+]] = amdaie.connection -// CHECK: %[[CONNECTION1:.+]] = amdaie.connection -// CHECK: amdaie.logicalobjectfifo.link -// CHECK-SAME: %[[CONNECTION0]] -// CHECK-SAME: %[[CONNECTION1]] -func.func @link(%arg0: memref<32x1024xi32>, %arg1: memref<32x64xi32, 1>, %arg2: memref<8x8x4x8xi32, 2>) { - %0 = amdaie.logicalobjectfifo.from_memref %arg0, {} :memref<32x1024xi32> -> !amdaie.logicalobjectfifo> - %1 = amdaie.logicalobjectfifo.from_memref %arg1, {} : memref<32x64xi32, 1> -> !amdaie.logicalobjectfifo> - %2 = amdaie.logicalobjectfifo.from_memref %arg2, {} : memref<8x8x4x8xi32, 2> -> !amdaie.logicalobjectfifo> - %3 = amdaie.connection(%1, %0) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %4 = amdaie.connection(%2, %1) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %5 = amdaie.npu.circular_dma_cpy_nd %3([] [] [], [] [] []) - %6 = amdaie.npu.circular_dma_cpy_nd %4([] [] [], [] [] []) - return -} - -// ----- - -// CHECK-LABEL: func.func @link_multiple_inputs -// CHECK: %[[CONNECTION0:.+]] = amdaie.connection -// CHECK: %[[CONNECTION1:.+]] = amdaie.connection -// CHECK: %[[CONNECTION2:.+]] = amdaie.connection -// CHECK: amdaie.logicalobjectfifo.link -// CHECK-DAG: %[[CONNECTION0]] -// CHECK-DAG: %[[CONNECTION1]] -// CHECK-DAG: %[[CONNECTION2]] -func.func @link_multiple_inputs(%arg0: memref<32x1024xi32>, %arg1: memref<32x64xi32, 1>, %arg2: memref<8x8x4x8xi32, 2>) { - %c1 = arith.constant 1 : index - %c2 = arith.constant 2 : index - %tile_0 = amdaie.tile(%c1, %c1) - %tile_1 = amdaie.tile(%c1, %c2) - %0 = amdaie.logicalobjectfifo.from_memref %arg0, {%tile_0} :memref<32x1024xi32> -> !amdaie.logicalobjectfifo> - %1 = amdaie.logicalobjectfifo.from_memref %arg0, {%tile_1} :memref<32x1024xi32> -> !amdaie.logicalobjectfifo> - %2 = amdaie.logicalobjectfifo.from_memref %arg1, {} : memref<32x64xi32, 1> -> !amdaie.logicalobjectfifo> - %3 = amdaie.logicalobjectfifo.from_memref %arg2, {} : memref<8x8x4x8xi32, 2> -> !amdaie.logicalobjectfifo> - %4 = amdaie.connection(%2, %0) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %5 = amdaie.connection(%2, %1) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %6 = amdaie.connection(%3, %2) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %7 = amdaie.npu.circular_dma_cpy_nd %4([] [] [], [] [] []) - %8 = amdaie.npu.circular_dma_cpy_nd %5([] [] [], [] [] []) - %9 = amdaie.npu.circular_dma_cpy_nd %6([] [] [], [] [] []) - return -} - -// ----- - -// Check correct link op generation for multiple producers with offsets and -// ensure correct order of the DMAs in the link operation's input list based on -// the base offset (CONNECTION2 should be ordered before CONNECTION1). -// CHECK-LABEL: func.func @link_multiple_inputs_with_offsets -// CHECK: %[[CONNECTION0:.+]] = amdaie.connection -// CHECK: %[[CONNECTION1:.+]] = amdaie.connection -// CHECK: %[[CONNECTION2:.+]] = amdaie.connection -// CHECK: %[[CONNECTION3:.+]] = amdaie.connection -// CHECK: amdaie.logicalobjectfifo.link[%[[CONNECTION0]], %[[CONNECTION2]], %[[CONNECTION1]]] -> [%[[CONNECTION3]]] () -func.func @link_multiple_inputs_with_offsets(%arg0: memref<32x1024xi32>, %arg1: memref<3x32x64xi32, 1>, %arg2: memref<8x8x4x8xi32, 2>) { - %c1 = arith.constant 1 : index - %c2 = arith.constant 2 : index - %c3 = arith.constant 3 : index - %tile_0 = amdaie.tile(%c1, %c1) - %tile_1 = amdaie.tile(%c1, %c2) - %tile_2 = amdaie.tile(%c1, %c3) - %0 = amdaie.logicalobjectfifo.from_memref %arg0, {%tile_0} :memref<32x1024xi32> -> !amdaie.logicalobjectfifo> - %1 = amdaie.logicalobjectfifo.from_memref %arg0, {%tile_1} :memref<32x1024xi32> -> !amdaie.logicalobjectfifo> - %2 = amdaie.logicalobjectfifo.from_memref %arg0, {%tile_2} :memref<32x1024xi32> -> !amdaie.logicalobjectfifo> - %3 = amdaie.logicalobjectfifo.from_memref %arg1, {} : memref<3x32x64xi32, 1> -> !amdaie.logicalobjectfifo> - %4 = amdaie.logicalobjectfifo.from_memref %arg2, {} : memref<8x8x4x8xi32, 2> -> !amdaie.logicalobjectfifo> - %5 = amdaie.connection(%3, %0) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %6 = amdaie.connection(%3, %1) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %7 = amdaie.connection(%3, %2) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %8 = amdaie.connection(%4, %3) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %9 = amdaie.npu.circular_dma_cpy_nd %5([0] [1024] [1], [] [] []) - %10 = amdaie.npu.circular_dma_cpy_nd %6([1, 0] [1, 1024] [2048, 1], [] [] []) - %11 = amdaie.npu.circular_dma_cpy_nd %7([1, 0] [1, 1024] [1024, 1], [] [] []) - %12 = amdaie.npu.circular_dma_cpy_nd %8([] [] [], [] [] []) - return -} - -// ----- - -func.func @link_multiple_inputs_with_overlapping_access(%arg0: memref<32x1024xi32>, %arg1: memref<3x32x64xi32, 1>, %arg2: memref<8x8x4x8xi32, 2>) { - %0 = amdaie.logicalobjectfifo.from_memref %arg0, {} :memref<32x1024xi32> -> !amdaie.logicalobjectfifo> - // expected-error @+1 {{couldn't create a link operation}} - %1 = amdaie.logicalobjectfifo.from_memref %arg1, {} : memref<3x32x64xi32, 1> -> !amdaie.logicalobjectfifo> - %2 = amdaie.logicalobjectfifo.from_memref %arg2, {} : memref<8x8x4x8xi32, 2> -> !amdaie.logicalobjectfifo> - %3 = amdaie.connection(%1, %0) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %4 = amdaie.connection(%1, %0) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %5 = amdaie.connection(%1, %0) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %6 = amdaie.connection(%2, %1) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %7 = amdaie.npu.circular_dma_cpy_nd %3([0] [1024] [1], [] [] []) - %8 = amdaie.npu.circular_dma_cpy_nd %4([1, 0] [1, 1024] [2048, 1], [] [] []) - // expected-error @+1 {{op has access pattern of which isn't contiguous with next one}} - %9 = amdaie.npu.circular_dma_cpy_nd %5([1, 0] [1, 1025] [1024, 1], [] [] []) - %10 = amdaie.npu.circular_dma_cpy_nd %6([] [] [], [] [] []) - return -} - -// ----- - -// CHECK-LABEL: func.func @link_multiple_outputs -// CHECK: %[[CONNECTION0:.+]] = amdaie.connection -// CHECK: %[[CONNECTION1:.+]] = amdaie.connection -// CHECK: %[[CONNECTION2:.+]] = amdaie.connection -// CHECK: amdaie.logicalobjectfifo.link -// CHECK-DAG: %[[CONNECTION0]] -// CHECK-DAG: %[[CONNECTION1]] -// CHECK-DAG: %[[CONNECTION2]] -func.func @link_multiple_outputs(%arg0: memref<32x1024xi32>, %arg1: memref<32x64xi32, 1>, %arg2: memref<8x8x4x8xi32, 2>) { - %c1 = arith.constant 1 : index - %c2 = arith.constant 2 : index - %tile_0 = amdaie.tile(%c1, %c1) - %tile_1 = amdaie.tile(%c1, %c2) - %0 = amdaie.logicalobjectfifo.from_memref %arg0, {} :memref<32x1024xi32> -> !amdaie.logicalobjectfifo> - %1 = amdaie.logicalobjectfifo.from_memref %arg1, {} : memref<32x64xi32, 1> -> !amdaie.logicalobjectfifo> - %2 = amdaie.logicalobjectfifo.from_memref %arg2, {%tile_0} : memref<8x8x4x8xi32, 2> -> !amdaie.logicalobjectfifo> - %3 = amdaie.logicalobjectfifo.from_memref %arg2, {%tile_1} : memref<8x8x4x8xi32, 2> -> !amdaie.logicalobjectfifo> - %4 = amdaie.connection(%1, %0) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %5 = amdaie.connection(%2, %1) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %6 = amdaie.connection(%3, %1) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %7 = amdaie.npu.circular_dma_cpy_nd %4([] [] [], [] [] []) - %8 = amdaie.npu.circular_dma_cpy_nd %5([] [] [], [] [] []) - %9 = amdaie.npu.circular_dma_cpy_nd %6([] [] [], [] [] []) - return -} - -// ----- - -// Check correct link op generation for multiple consumers with offsets and -// ensure correct order of the DMAs in the link operation's output list based on -// the base offset (CONNECTION3 should be ordered before CONNECTION2, which should be ordered -// before CONNECTION1). -// CHECK-LABEL: func.func @link_multiple_outputs_with_offsets -// CHECK: %[[CONNECTION0:.+]] = amdaie.connection -// CHECK: %[[CONNECTION1:.+]] = amdaie.connection -// CHECK: %[[CONNECTION2:.+]] = amdaie.connection -// CHECK: %[[CONNECTION3:.+]] = amdaie.connection -// CHECK: amdaie.logicalobjectfifo.link[%[[CONNECTION0]]] -> [%[[CONNECTION3]], %[[CONNECTION2]], %[[CONNECTION1]]] () -func.func @link_multiple_outputs_with_offsets(%arg0: memref<32x1024xi32>, %arg1: memref<3x32x64xi32, 1>, %arg2: memref<8x8x4x8xi32, 2>) { - %c1 = arith.constant 1 : index - %c2 = arith.constant 2 : index - %c3 = arith.constant 3 : index - %tile_0 = amdaie.tile(%c1, %c1) - %tile_1 = amdaie.tile(%c1, %c2) - %tile_2 = amdaie.tile(%c1, %c3) - %0 = amdaie.logicalobjectfifo.from_memref %arg0, {} :memref<32x1024xi32> -> !amdaie.logicalobjectfifo> - %1 = amdaie.logicalobjectfifo.from_memref %arg1, {} : memref<3x32x64xi32, 1> -> !amdaie.logicalobjectfifo> - %2 = amdaie.logicalobjectfifo.from_memref %arg2, {%tile_0} : memref<8x8x4x8xi32, 2> -> !amdaie.logicalobjectfifo> - %3 = amdaie.logicalobjectfifo.from_memref %arg2, {%tile_1} : memref<8x8x4x8xi32, 2> -> !amdaie.logicalobjectfifo> - %4 = amdaie.logicalobjectfifo.from_memref %arg2, {%tile_2} : memref<8x8x4x8xi32, 2> -> !amdaie.logicalobjectfifo> - %5 = amdaie.connection(%1, %0) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %6 = amdaie.connection(%2, %1) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %7 = amdaie.connection(%3, %1) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %8 = amdaie.connection(%4, %1) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %9 = amdaie.npu.circular_dma_cpy_nd %5([] [] [], [] [] []) - %10 = amdaie.npu.circular_dma_cpy_nd %6([] [] [], [1, 0] [1, 1024] [2048, 1]) - %11 = amdaie.npu.circular_dma_cpy_nd %7([] [] [], [1, 0] [1, 1024] [1024, 1]) - %12 = amdaie.npu.circular_dma_cpy_nd %8([] [] [], [0] [1024] [1]) - return -} - -// ----- - -func.func @link_multiple_outputs_with_overlapping_access(%arg0: memref<32x1024xi32>, %arg1: memref<3x32x64xi32, 1>, %arg2: memref<8x8x4x8xi32, 2>) { - %0 = amdaie.logicalobjectfifo.from_memref %arg0, {} :memref<32x1024xi32> -> !amdaie.logicalobjectfifo> - // expected-error @+1 {{couldn't create a link operation}} - %1 = amdaie.logicalobjectfifo.from_memref %arg1, {} : memref<3x32x64xi32, 1> -> !amdaie.logicalobjectfifo> - %2 = amdaie.logicalobjectfifo.from_memref %arg2, {} : memref<8x8x4x8xi32, 2> -> !amdaie.logicalobjectfifo> - %3 = amdaie.connection(%1, %0) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %4 = amdaie.connection(%2, %1) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %5 = amdaie.connection(%2, %1) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %6 = amdaie.connection(%2, %1) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %7 = amdaie.npu.circular_dma_cpy_nd %3([] [] [], [] [] []) - %8 = amdaie.npu.circular_dma_cpy_nd %4([] [] [], [1, 0] [1, 1024] [2048, 1]) - %9 = amdaie.npu.circular_dma_cpy_nd %5([] [] [], [1, 0] [1, 1024] [1024, 1]) - // expected-error @+1 {{op has access pattern of which isn't contiguous with next one}} - %10 = amdaie.npu.circular_dma_cpy_nd %6([] [] [], [0, 0] [32, 32] [64, 1]) - return -} - -// ----- - -// CHECK-LABEL: func.func @link_multiple_inputs_and_outputs -// CHECK: %[[CONNECTION0:.+]] = amdaie.connection -// CHECK: %[[CONNECTION1:.+]] = amdaie.connection -// CHECK: %[[CONNECTION2:.+]] = amdaie.connection -// CHECK: %[[CONNECTION3:.+]] = amdaie.connection -// CHECK: amdaie.logicalobjectfifo.link -// CHECK-DAG: %[[CONNECTION0]] -// CHECK-DAG: %[[CONNECTION1]] -// CHECK-DAG: %[[CONNECTION2]] -// CHECK-DAG: %[[CONNECTION3]] -func.func @link_multiple_inputs_and_outputs(%arg0: memref<32x1024xi32>, %arg1: memref<2x32x64xi32, 1>, %arg2: memref<8x8x4x8xi32, 2>) { - %c1 = arith.constant 1 : index - %c2 = arith.constant 2 : index - %c3 = arith.constant 3 : index - %c4 = arith.constant 4 : index - %tile_0 = amdaie.tile(%c1, %c1) - %tile_1 = amdaie.tile(%c1, %c2) - %tile_2 = amdaie.tile(%c1, %c3) - %tile_3 = amdaie.tile(%c1, %c4) - %0 = amdaie.logicalobjectfifo.from_memref %arg0, {%tile_0} :memref<32x1024xi32> -> !amdaie.logicalobjectfifo> - %1 = amdaie.logicalobjectfifo.from_memref %arg1, {} : memref<2x32x64xi32, 1> -> !amdaie.logicalobjectfifo> - %2 = amdaie.logicalobjectfifo.from_memref %arg0, {%tile_1} :memref<32x1024xi32> -> !amdaie.logicalobjectfifo> - %3 = amdaie.logicalobjectfifo.from_memref %arg2, {%tile_2} : memref<8x8x4x8xi32, 2> -> !amdaie.logicalobjectfifo> - %4 = amdaie.logicalobjectfifo.from_memref %arg2, {%tile_3} : memref<8x8x4x8xi32, 2> -> !amdaie.logicalobjectfifo> - %5 = amdaie.connection(%1, %0) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %6 = amdaie.connection(%1, %2) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %7 = amdaie.connection(%3, %1) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %8 = amdaie.connection(%4, %1) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %9 = amdaie.npu.circular_dma_cpy_nd %5([] [] [], [] [] []) - %10 = amdaie.npu.circular_dma_cpy_nd %6([] [] [], [] [] []) - %11 = amdaie.npu.circular_dma_cpy_nd %7([] [] [], [] [] []) - %12 = amdaie.npu.circular_dma_cpy_nd %8([] [] [], [] [] []) - return -} - -// ----- - -// CHECK-LABEL: func.func @link_multiple_inputs_and_outputs_with_offsets -// CHECK: %[[CONNECTION0:.+]] = amdaie.connection -// CHECK: %[[CONNECTION1:.+]] = amdaie.connection -// CHECK: %[[CONNECTION2:.+]] = amdaie.connection -// CHECK: %[[CONNECTION3:.+]] = amdaie.connection -// CHECK: amdaie.logicalobjectfifo.link[%[[CONNECTION0]], %[[CONNECTION1]]] -> [%[[CONNECTION3]], %[[CONNECTION2]]] () -func.func @link_multiple_inputs_and_outputs_with_offsets(%arg0: memref<32x1024xi32>, %arg1: memref<2x32x64xi32, 1>, %arg2: memref<8x8x4x8xi32, 2>) { - %c1 = arith.constant 1 : index - %c2 = arith.constant 2 : index - %c3 = arith.constant 3 : index - %c4 = arith.constant 4 : index - %tile_0 = amdaie.tile(%c1, %c1) - %tile_1 = amdaie.tile(%c1, %c2) - %tile_2 = amdaie.tile(%c1, %c3) - %tile_3 = amdaie.tile(%c1, %c4) - %0 = amdaie.logicalobjectfifo.from_memref %arg0, {%tile_0} :memref<32x1024xi32> -> !amdaie.logicalobjectfifo> - %1 = amdaie.logicalobjectfifo.from_memref %arg1, {} : memref<2x32x64xi32, 1> -> !amdaie.logicalobjectfifo> - %2 = amdaie.logicalobjectfifo.from_memref %arg0, {%tile_1} :memref<32x1024xi32> -> !amdaie.logicalobjectfifo> - %3 = amdaie.logicalobjectfifo.from_memref %arg2, {%tile_2} : memref<8x8x4x8xi32, 2> -> !amdaie.logicalobjectfifo> - %4 = amdaie.logicalobjectfifo.from_memref %arg2, {%tile_3} : memref<8x8x4x8xi32, 2> -> !amdaie.logicalobjectfifo> - %5 = amdaie.connection(%1, %0) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %6 = amdaie.connection(%1, %2) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %7 = amdaie.connection(%3, %1) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %8 = amdaie.connection(%4, %1) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %9 = amdaie.npu.circular_dma_cpy_nd %5([0] [1024] [1], [] [] []) - %10 = amdaie.npu.circular_dma_cpy_nd %6([1, 0] [1, 1024] [1024, 1], [] [] []) - %11 = amdaie.npu.circular_dma_cpy_nd %7([] [] [], [1, 0] [1, 1024] [1024, 1]) - %12 = amdaie.npu.circular_dma_cpy_nd %8([] [] [], [0] [1024] [1]) - return -} - -// ----- - -// Make sure offsets on the non-link side are not removed. -// CHECK-LABEL: func.func @ensure_no_removal_of_offsets -// CHECK: %[[CONNECTION0:.+]] = amdaie.connection -// CHECK: %[[CONNECTION1:.+]] = amdaie.connection -// CHECK: amdaie.logicalobjectfifo.link[%[[CONNECTION0]]] -> [%[[CONNECTION1]]] () -// CHECK: amdaie.npu.circular_dma_cpy_nd %[[CONNECTION0]]([] [] [], [1] [1] [1024]) -// CHECK: amdaie.npu.circular_dma_cpy_nd %[[CONNECTION1]]([1] [1] [2048], [] [] []) -func.func @ensure_no_removal_of_offsets(%arg0: memref<32x1024xi32>, %arg1: memref<32x64xi32, 1>, %arg2: memref<2x8x8x4x8xi32, 2>) { - %0 = amdaie.logicalobjectfifo.from_memref %arg0, {} :memref<32x1024xi32> -> !amdaie.logicalobjectfifo> - %1 = amdaie.logicalobjectfifo.from_memref %arg1, {} : memref<32x64xi32, 1> -> !amdaie.logicalobjectfifo> - %2 = amdaie.logicalobjectfifo.from_memref %arg2, {} : memref<2x8x8x4x8xi32, 2> -> !amdaie.logicalobjectfifo> - %3 = amdaie.connection(%1, %0) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %4 = amdaie.connection(%2, %1) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %5 = amdaie.npu.circular_dma_cpy_nd %3([] [] [], [1] [1] [1024]) - %6 = amdaie.npu.circular_dma_cpy_nd %4([1] [1] [2048], [] [] []) - return -} - -// ----- - -func.func @link_different_blocks(%arg0: memref<32x1024xi32>, %arg1: memref<32x64xi32, 1>, %arg2: memref<8x8x4x8xi32, 2>) { - %0 = amdaie.logicalobjectfifo.from_memref %arg0, {} :memref<32x1024xi32> -> !amdaie.logicalobjectfifo> - // expected-error @+2 {{has copy-like users not residing in the same block}} - // expected-error @+1 {{couldn't create a link operation}} - %1 = amdaie.logicalobjectfifo.from_memref %arg1, {} : memref<32x64xi32, 1> -> !amdaie.logicalobjectfifo> - %2 = amdaie.logicalobjectfifo.from_memref %arg2, {} : memref<8x8x4x8xi32, 2> -> !amdaie.logicalobjectfifo> - %3 = amdaie.connection(%1, %0) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %4 = amdaie.npu.circular_dma_cpy_nd %3([] [] [], [] [] []) - scf.forall (%arg3, %arg4) in (1, 2) { - %5 = amdaie.connection(%2, %1) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) - %6 = amdaie.npu.circular_dma_cpy_nd %5([] [] [], [] [] []) - } - return -}