diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIECanonicalizeDma.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIECanonicalizeDma.cpp deleted file mode 100644 index c5e9efc30..000000000 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIECanonicalizeDma.cpp +++ /dev/null @@ -1,166 +0,0 @@ -// Copyright 2024 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include "air/Dialect/AIR/AIRDialect.h" -#include "iree-amd-aie/Transforms/AMDAIEDmaUtils.h" -#include "iree-amd-aie/Transforms/Passes.h" -#include "mlir/Dialect/Arith/Utils/Utils.h" -#include "mlir/Dialect/Linalg/IR/Linalg.h" -#include "mlir/Dialect/Tensor/IR/Tensor.h" -#include "mlir/Pass/Pass.h" -#include "mlir/Transforms/GreedyPatternRewriteDriver.h" - -#define DEBUG_TYPE "iree-amdaie-canonicalize-dma" - -namespace mlir::iree_compiler::AMDAIE { - -namespace { - -class FoldUnitDimsInDma : public OpRewritePattern { - public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(xilinx::air::DmaMemcpyNdOp op, - PatternRewriter &rewriter) const override { - Location loc = op->getLoc(); - - SmallVector srcOffsets = - getAsOpFoldResult(op.getSrcOffsets()); - SmallVector dstOffsets = - getAsOpFoldResult(op.getDstOffsets()); - - SmallVector srcStrides = - getAsOpFoldResult(op.getSrcStrides()); - SmallVector dstStrides = - getAsOpFoldResult(op.getDstStrides()); - - SmallVector srcSizes = getAsOpFoldResult(op.getSrcSizes()); - SmallVector dstSizes = getAsOpFoldResult(op.getDstSizes()); - - SmallVector newSrcOffsets, newDstOffsets, newSrcStrides, - newDstStrides, newSrcSizes, newDstSizes; - - // We do not make any assumptions when all offsets are not - // specified and dont change the op in that case. - if (srcStrides.size() != srcOffsets.size() || - dstStrides.size() != dstOffsets.size()) { - return rewriter.notifyMatchFailure( - op, "offset dimensions dont match stride dimensions"); - } - // Fold source dims. - LogicalResult foldableUnitDimsFoundInSrc = - foldUnitDims(srcOffsets, srcSizes, srcStrides, newSrcOffsets, - newSrcSizes, newSrcStrides); - // Fold destination dims. - LogicalResult foldableUnitDimsFoundInDst = - foldUnitDims(dstOffsets, dstSizes, dstStrides, newDstOffsets, - newDstSizes, newDstStrides); - if (failed(foldableUnitDimsFoundInSrc) && - failed(foldableUnitDimsFoundInDst)) { - return rewriter.notifyMatchFailure(op, "no foldable unit dims found"); - } - - rewriter.replaceOpWithNewOp( - op, SmallVector{}, op.getAsyncDependencies(), op.getDst(), - getValueOrCreateConstantIndexOp(rewriter, loc, newDstOffsets), - getValueOrCreateConstantIndexOp(rewriter, loc, newDstSizes), - getValueOrCreateConstantIndexOp(rewriter, loc, newDstStrides), - op.getSrc(), - getValueOrCreateConstantIndexOp(rewriter, loc, newSrcOffsets), - getValueOrCreateConstantIndexOp(rewriter, loc, newSrcSizes), - getValueOrCreateConstantIndexOp(rewriter, loc, newSrcStrides)); - return success(); - } -}; - -class FoldLinearDimsInDma - : public OpRewritePattern { - public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(xilinx::air::DmaMemcpyNdOp op, - PatternRewriter &rewriter) const override { - Location loc = op->getLoc(); - - SmallVector srcOffsets = - getAsOpFoldResult(op.getSrcOffsets()); - SmallVector dstOffsets = - getAsOpFoldResult(op.getDstOffsets()); - - SmallVector srcStrides = - getAsOpFoldResult(op.getSrcStrides()); - SmallVector dstStrides = - getAsOpFoldResult(op.getDstStrides()); - - SmallVector srcSizes = getAsOpFoldResult(op.getSrcSizes()); - SmallVector dstSizes = getAsOpFoldResult(op.getDstSizes()); - - SmallVector newSrcOffsets, newDstOffsets, newSrcStrides, - newDstStrides, newSrcSizes, newDstSizes; - - // We do not make any assumptions when all offsets are not - // specified and dont change the op in that case. - if (srcStrides.size() != srcOffsets.size() || - dstStrides.size() != dstOffsets.size()) { - return rewriter.notifyMatchFailure( - op, "offset dimensions dont match stride dimensions"); - } - - // Fold source dims. - LogicalResult foldableLinearDimsFoundInSrc = - foldLinearDims(op.getContext(), srcOffsets, srcSizes, srcStrides, - newSrcOffsets, newSrcSizes, newSrcStrides); - // Fold destination dims. - LogicalResult foldableLinearDimsFoundInDst = - foldLinearDims(op.getContext(), dstOffsets, dstSizes, dstStrides, - newDstOffsets, newDstSizes, newDstStrides); - if (failed(foldableLinearDimsFoundInSrc) && - failed(foldableLinearDimsFoundInDst)) { - return rewriter.notifyMatchFailure(op, "no foldable linear dims found"); - } - - rewriter.replaceOpWithNewOp( - op, SmallVector{}, op.getAsyncDependencies(), op.getDst(), - getValueOrCreateConstantIndexOp(rewriter, loc, newDstOffsets), - getValueOrCreateConstantIndexOp(rewriter, loc, newDstSizes), - getValueOrCreateConstantIndexOp(rewriter, loc, newDstStrides), - op.getSrc(), - getValueOrCreateConstantIndexOp(rewriter, loc, newSrcOffsets), - getValueOrCreateConstantIndexOp(rewriter, loc, newSrcSizes), - getValueOrCreateConstantIndexOp(rewriter, loc, newSrcStrides)); - return success(); - } -}; - -class AMDAIECanonicalizeDmaPass - : public impl::AMDAIECanonicalizeDmaBase { - public: - void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); - } - - AMDAIECanonicalizeDmaPass() = default; - AMDAIECanonicalizeDmaPass(const AMDAIECanonicalizeDmaPass &pass){}; - void runOnOperation() override; -}; - -void AMDAIECanonicalizeDmaPass::runOnOperation() { - MLIRContext *context = &getContext(); - RewritePatternSet patterns(context); - patterns.insert(context); - if (failed( - applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)))) { - return signalPassFailure(); - } -} - -} // namespace - -std::unique_ptr createAMDAIECanonicalizeDmaPass() { - return std::make_unique(); -} -} // namespace mlir::iree_compiler::AMDAIE diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/CMakeLists.txt b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/CMakeLists.txt index 2979c71ef..eb32b3e18 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/CMakeLists.txt +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/CMakeLists.txt @@ -50,7 +50,6 @@ iree_cc_library( "AMDAIEAssignLogicalObjectFifoDepth.cpp" "AMDAIEAssignNpuDmaBdIds.cpp" "AMDAIEBufferizeToAllocation.cpp" - "AMDAIECanonicalizeDma.cpp" "AMDAIECanonicalizeNpuDmaCpyNd.cpp" "AMDAIECanonicalizeDoublyStridedOp.cpp" "AMDAIECombineStridedOps.cpp" diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/PassDetail.h b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/PassDetail.h index 8912db52d..bb7062a22 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/PassDetail.h +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/PassDetail.h @@ -27,7 +27,6 @@ namespace mlir::iree_compiler::AMDAIE { #define GEN_PASS_DEF_AMDAIEASSIGNNPUDMABDIDS #define GEN_PASS_DEF_AMDAIEBRIDGETOAIR #define GEN_PASS_DEF_AMDAIEBUFFERIZETOALLOCATION -#define GEN_PASS_DEF_AMDAIECANONICALIZEDMA #define GEN_PASS_DEF_AMDAIECANONICALIZEDOUBLYSTRIDEDOP #define GEN_PASS_DEF_AMDAIECANONICALIZENPUDMACPYND #define GEN_PASS_DEF_AMDAIECLEANUP diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp index fa2f73482..1d0cf8981 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp @@ -670,11 +670,7 @@ void addMLIRAIRLoweringPasses(OpPassManager &passManager, AMDAIEDevice device) { passManager.addPass(createCanonicalizerPass()); } - // TODO (Erwei): Figure out a way to work with AMDAIEPackToDmaPass. - if (clUseTilePipeline == TilePassPipeline::PackPeelPipeline) - passManager.addPass(createAMDAIEDecomposeLinalgExtPackUnPackToAIRPass()); - else - passManager.addPass(createAMDAIEPackToDmaPass()); + passManager.addPass(createAMDAIEDecomposeLinalgExtPackUnPackToAIRPass()); // TODO(newling) adding createCanonicalizerPass introduces a dma copy lowering // failure. Understand and fix. @@ -691,7 +687,6 @@ void addMLIRAIRLoweringPasses(OpPassManager &passManager, AMDAIEDevice device) { } passManager.addPass(createCanonicalizerPass()); passManager.addPass(createCSEPass()); - passManager.addPass(createAMDAIECanonicalizeDmaPass()); passManager.addPass(xilinx::air::createCopyToDmaPass()); passManager.addPass(createCanonicalizerPass()); passManager.addPass(createCSEPass()); diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.h b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.h index fe5670067..f6e8dd1a5 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.h +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.h @@ -77,9 +77,6 @@ std::unique_ptr createAMDAIEBridgeToAIRPass(); std::unique_ptr createAMDAIEBufferizeToAllocationPass( AMDAIEBufferizeToAllocationOptions options = {}); -/// Create pass to apply canonicalization to air.dma_memcpy_nd op's. -std::unique_ptr createAMDAIECanonicalizeDmaPass(); - /// Create pass to canonicalize `amdaie.npu.dma_cpy_nd` operations. std::unique_ptr createAMDAIECanonicalizeNpuDmaCpyNdPass(); diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td index 73ceee040..bc23a911f 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td @@ -82,12 +82,6 @@ def AMDAIEBufferizeToAllocation : ]; } -def AMDAIECanonicalizeDma : - Pass<"iree-amdaie-canonicalize-dma", ""> { - let summary = "Apply caonicaliztions to air.dma_memcpy_nd op's"; - let constructor = "mlir::iree_compiler::AMDAIE::createAMDAIECanonicalizeDmaPass()"; -} - def AMDAIECanonicalizeDoublyStridedOp : Pass<"iree-amdaie-canonicalize-doubly-strided-op", ""> { let summary = "Canonicalize doubly strided DMA operations."; @@ -98,7 +92,6 @@ def AMDAIECanonicalizeDoublyStridedOp : ]; } - def AMDAIECanonicalizeNpuDmaCpyNd : Pass<"iree-amdaie-canonicalize-npu-dma-cpy-nd", "ModuleOp"> { let summary = "Canonicalize npu.dma_cpy_nd operations."; diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/CMakeLists.txt b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/CMakeLists.txt index 9be76765b..219443e85 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/CMakeLists.txt +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/CMakeLists.txt @@ -16,7 +16,6 @@ iree_lit_test_suite( "assign_npu_dma_bd_ids.mlir" "bridge_to_air.mlir" "bufferize_to_allocation.mlir" - "canonicalize_dma.mlir" "canonicalize_doubly_strided_op.mlir" "canonicalize_npu_dma_cpy_nd.mlir" "combine_strided_ops.mlir" diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/canonicalize_dma.mlir b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/canonicalize_dma.mlir deleted file mode 100644 index b6d19d148..000000000 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/canonicalize_dma.mlir +++ /dev/null @@ -1,18 +0,0 @@ - // RUN: iree-opt --pass-pipeline="builtin.module(iree-amdaie-canonicalize-dma)" %s | FileCheck %s - // CHECK-LABEL: @canonicalize_dma - func.func @canonicalize_dma() { - %c16 = arith.constant 16 : index - %c32 = arith.constant 32 : index - %c64 = arith.constant 64 : index - %c128 = arith.constant 128 : index - %c8 = arith.constant 8 : index - %c4 = arith.constant 4 : index - %c2 = arith.constant 2 : index - %c1 = arith.constant 1 : index - %c0 = arith.constant 0 : index - %alloc = memref.alloc() : memref<1x1x8x16xi32, 1> - %alloc_0 = memref.alloc() : memref<1x1x2x2x4x8xi32, 2> - // CHECK: air.dma_memcpy_nd (%alloc_0[%c0] [%c128] [%c1], %alloc[%c0, %c0, %c0] [%c2, %c8, %c8] [%c8, %c16, %c1]) : (memref<1x1x2x2x4x8xi32, 2>, memref<1x1x8x16xi32, 1>) - air.dma_memcpy_nd (%alloc_0[%c0, %c0, %c0, %c0, %c0, %c0] [%c1, %c1, %c2, %c2, %c4, %c8] [%c128, %c128, %c64, %c32, %c8, %c1], %alloc[%c0, %c0, %c0, %c0, %c0, %c0] [%c1, %c1, %c2, %c2, %c4, %c8] [%c128, %c128, %c8, %c64, %c16, %c1]) : (memref<1x1x2x2x4x8xi32, 2>, memref<1x1x8x16xi32, 1>) - return - }