ObjFIFO Multi-Dimensional Patch (#692)

* Allow objFIFO link transforms on distribute with no bcast * Allow nd DMA for types other than i32 * Add tests * clang format * Header name fix * Remove restriction of 1xi32 for memrefs in ND DMABDs * clang format
Xilinx · Oct 23, 2023 · 6b1c08d · 6b1c08d
1 parent 859220b
commit 6b1c08d
Show file tree

Hide file tree

Showing 6 changed files with 187 additions and 93 deletions.
diff --git a/lib/Dialect/AIE/IR/AIEDialect.cpp b/lib/Dialect/AIE/IR/AIEDialect.cpp
@@ -582,9 +582,10 @@ LogicalResult xilinx::AIE::ObjectFifoLinkOp::verify() {
 
     int outputSize = 0;
     for (auto fifoOut : getOutputObjectFifos()) {
-      if (fifoOut.getDimensionsToStream().size() > 0) {
+      if ((fifoOut.getDimensionsToStream().size() > 0) &&
+          (fifoOut.getConsumerTiles().size() > 1)) {
         return emitOpError("currently does not support objectFifos with "
-                           "dimensionsToStream.");
+                           "dimensionsToStream and multiple consumers.");
       }
       for (auto dims : fifoOut.getDimensionsFromStreamPerConsumer()) {
         if (dims.size() > 0)
@@ -1337,18 +1338,11 @@ LogicalResult xilinx::AIE::DMABDOp::verify() {
   // The following checks only apply if non-default strides/wraps are defined.
   if (getDimensions()) {
     ::mlir::MemRefType buffer = getBuffer().getType();
-    // We are restrictive about the type of the memref used as the input address
+    // We are not restrictive about the type of the memref used as the input
     // to the DMABD when used with multi-dimensional strides/wraps. Since the
     // BD will use the memref as a base address and copy from it in 32 bit
-    // chunks, while assuming the layout of the memref is contiguous, we
-    // disallow anything whose elemental size is not 32 bits, or where we
-    // cannot verify that the layout is contiguous.
-    if (!buffer.getElementType().isInteger(32) || buffer.getRank() > 1 ||
-        !buffer.getLayout().isIdentity()) {
-      return emitOpError() << "Specifying transfer step sizes and wraps is only"
-                              " supported for one-dimensional memrefs of 32 bit"
-                              " integer elements.";
-    }
+    // chunks, while assuming the layout of the memref is contiguous. We
+    // assume the user/compiler understands and accounts for this.
     uint64_t memref_size = 1; // in bytes
     uint64_t max_idx = 0;
     for (int64_t memref_dim : buffer.getShape()) {

diff --git a/test/dialect/AIE/nd-dma-wrong-rank.mlir b/test/dialect/AIE/nd-dma-wrong-rank.mlir
diff --git a/test/dialect/AIE/nd-dma-wrong-type.mlir b/test/dialect/AIE/nd-dma-wrong-type.mlir
diff --git a/test/objectFifo-stateful-transform/nd_dma_distribute_AIE2.mlir b/test/objectFifo-stateful-transform/nd_dma_distribute_AIE2.mlir
@@ -8,7 +8,109 @@
 //
 //===----------------------------------------------------------------------===//
 
-// RUN: aie-opt --aie-objectFifo-stateful-transform --verify-diagnostics %s
+// RUN: aie-opt --aie-objectFifo-stateful-transform %s
+
+// CHECK: module @ndDMAObjFifoAIE2 {
+// CHECK:   AIE.device(xcve2302) {
+// CHECK:     memref.global "public" @of2_cons : memref<128xi32>
+// CHECK:     memref.global "public" @of2 : memref<128xi32>
+// CHECK:     memref.global "public" @of1_cons : memref<128xi32>
+// CHECK:     memref.global "public" @of1 : memref<128xi32>
+// CHECK:     memref.global "public" @of0_cons : memref<256xi32>
+// CHECK:     memref.global "public" @of0 : memref<256xi32>
+// CHECK:     %[[tile_1_0:.*]] = AIE.tile(1, 0)
+// CHECK:     %[[tile_1_1:.*]] = AIE.tile(1, 1)
+// CHECK:     %[[tile_2_2:.*]] = AIE.tile(2, 2)
+// CHECK:     %[[tile_2_3:.*]] = AIE.tile(2, 3)
+// CHECK:     %[[of2_cons_buf_0:.*]] = AIE.buffer(%[[tile_2_3:.*]]) {sym_name = "of2_cons_buff_0"} : memref<128xi32>
+// CHECK:     %[[of2_cons_buf_1:.*]] = AIE.buffer(%[[tile_2_3:.*]]) {sym_name = "of2_cons_buff_1"} : memref<128xi32>
+// CHECK:     %[[of2_cons_prod_lock:.*]] = AIE.lock(%[[tile_2_3:.*]], 0) {init = 2 : i32, sym_name = "of2_cons_prod_lock"}
+// CHECK:     %[[of2_cons_cons_lock:.*]] = AIE.lock(%[[tile_2_3:.*]], 1) {init = 0 : i32, sym_name = "of2_cons_cons_lock"}
+// CHECK:     %[[of1_cons_buf_0:.*]] = AIE.buffer(%[[tile_2_2:.*]]) {sym_name = "of1_cons_buff_0"} : memref<128xi32>
+// CHECK:     %[[of1_cons_buf_1:.*]] = AIE.buffer(%[[tile_2_2:.*]]) {sym_name = "of1_cons_buff_1"} : memref<128xi32>
+// CHECK:     %[[of1_cons_prod_lock:.*]] = AIE.lock(%[[tile_2_2:.*]], 0) {init = 2 : i32, sym_name = "of1_cons_prod_lock"}
+// CHECK:     %[[of1_cons_cons_lock:.*]] = AIE.lock(%[[tile_2_2:.*]], 1) {init = 0 : i32, sym_name = "of1_cons_cons_lock"}
+// CHECK:     %[[of0_cons_buf_0:.*]] = AIE.buffer(%[[tile_1_1:.*]]) {sym_name = "of0_cons_buff_0"} : memref<256xi32>
+// CHECK:     %[[of0_cons_buf_1:.*]] = AIE.buffer(%[[tile_1_1:.*]]) {sym_name = "of0_cons_buff_1"} : memref<256xi32>
+// CHECK:     %[[of0_cons_prod_lock:.*]] = AIE.lock(%[[tile_1_1:.*]], 0) {init = 4 : i32, sym_name = "of0_cons_prod_lock"}
+// CHECK:     %[[of0_cons_cons_lock:.*]] = AIE.lock(%[[tile_1_1:.*]], 1) {init = 0 : i32, sym_name = "of0_cons_cons_lock"}
+// CHECK:     %[[of0_prod_lock:.*]] = AIE.lock(%[[tile_1_0:.*]], 0) {init = 0 : i32, sym_name = "of0_prod_lock"}
+// CHECK:     %[[of0_cons_lock:.*]] = AIE.lock(%[[tile_1_0:.*]], 1) {init = 0 : i32, sym_name = "of0_cons_lock"}
+// CHECK:     AIE.flow(%[[tile_1_0:.*]], DMA : 0, %[[tile_1_1:.*]], DMA : 0)
+// CHECK:     AIE.flow(%[[tile_1_1:.*]], DMA : 0, %[[tile_2_2:.*]], DMA : 0)
+// CHECK:     AIE.flow(%[[tile_1_1:.*]], DMA : 1, %[[tile_2_3:.*]], DMA : 0)
+// CHECK:     AIE.shimDMAAllocation @of0(MM2S, 0, 1)
+// CHECK:     %18 = AIE.memTileDMA(%[[tile_1_1:.*]]) {
+// CHECK:       %21 = AIE.dmaStart(S2MM, 0, ^bb1, ^bb3)
+// CHECK:     ^bb1:  // 2 preds: ^bb0, ^bb2
+// CHECK:       AIE.useLock(%[[of0_cons_prod_lock:.*]], AcquireGreaterEqual, 2)
+// CHECK:       AIE.dmaBd(<%[[of0_cons_buf_0:.*]] : memref<256xi32>, 0, 256>, 0)
+// CHECK:       AIE.useLock(%[[of0_cons_cons_lock:.*]], Release, 2)
+// CHECK:       AIE.nextBd ^bb2
+// CHECK:     ^bb2:  // pred: ^bb1
+// CHECK:       AIE.useLock(%[[of0_cons_prod_lock:.*]], AcquireGreaterEqual, 2)
+// CHECK:       AIE.dmaBd(<%[[of0_cons_buf_1:.*]] : memref<256xi32>, 0, 256>, 0)
+// CHECK:       AIE.useLock(%[[of0_cons_cons_lock:.*]], Release, 2)
+// CHECK:       AIE.nextBd ^bb1
+// CHECK:     ^bb3:  // pred: ^bb0
+// CHECK:       %22 = AIE.dmaStart(MM2S, 0, ^bb4, ^bb6)
+// CHECK:     ^bb4:  // 2 preds: ^bb3, ^bb5
+// CHECK:       AIE.useLock(%[[of0_cons_cons_lock:.*]], AcquireGreaterEqual, 1)
+// CHECK:       AIE.dmaBd(<%[[of0_cons_buf_0:.*]] : memref<256xi32>, 0, 128>, 0, [<4, 64>, <2, 4>, <8, 8>, <4, 1>])
+// CHECK:       AIE.useLock(%[[of0_cons_prod_lock:.*]], Release, 1)
+// CHECK:       AIE.nextBd ^bb5
+// CHECK:     ^bb5:  // pred: ^bb4
+// CHECK:       AIE.useLock(%[[of0_cons_cons_lock:.*]], AcquireGreaterEqual, 1)
+// CHECK:       AIE.dmaBd(<%[[of0_cons_buf_1:.*]] : memref<256xi32>, 0, 128>, 0, [<4, 64>, <2, 4>, <8, 8>, <4, 1>])
+// CHECK:       AIE.useLock(%[[of0_cons_prod_lock:.*]], Release, 1)
+// CHECK:       AIE.nextBd ^bb4
+// CHECK:     ^bb6:  // pred: ^bb3
+// CHECK:       %23 = AIE.dmaStart(MM2S, 1, ^bb7, ^bb9)
+// CHECK:     ^bb7:  // 2 preds: ^bb6, ^bb8
+// CHECK:       AIE.useLock(%[[of0_cons_cons_lock:.*]], AcquireGreaterEqual, 1)
+// CHECK:       AIE.dmaBd(<%[[of0_cons_buf_0:.*]] : memref<256xi32>, 512, 128>, 0, [<4, 64>, <2, 4>, <8, 8>, <4, 1>])
+// CHECK:       AIE.useLock(%[[of0_cons_prod_lock:.*]], Release, 1)
+// CHECK:       AIE.nextBd ^bb8
+// CHECK:     ^bb8:  // pred: ^bb7
+// CHECK:       AIE.useLock(%[[of0_cons_cons_lock:.*]], AcquireGreaterEqual, 1)
+// CHECK:       AIE.dmaBd(<%[[of0_cons_buf_1:.*]] : memref<256xi32>, 512, 128>, 0, [<4, 64>, <2, 4>, <8, 8>, <4, 1>])
+// CHECK:       AIE.useLock(%[[of0_cons_prod_lock:.*]], Release, 1)
+// CHECK:       AIE.nextBd ^bb7
+// CHECK:     ^bb9:  // pred: ^bb6
+// CHECK:       AIE.end
+// CHECK:     }
+// CHECK:     %19 = AIE.mem(%[[tile_2_2:.*]]) {
+// CHECK:       %21 = AIE.dmaStart(S2MM, 0, ^bb1, ^bb3)
+// CHECK:     ^bb1:  // 2 preds: ^bb0, ^bb2
+// CHECK:       AIE.useLock(%[[of1_cons_prod_lock:.*]], AcquireGreaterEqual, 1)
+// CHECK:       AIE.dmaBd(<%[[of1_cons_buf_0:.*]] : memref<128xi32>, 0, 128>, 0)
+// CHECK:       AIE.useLock(%[[of1_cons_cons_lock:.*]], Release, 1)
+// CHECK:       AIE.nextBd ^bb2
+// CHECK:     ^bb2:  // pred: ^bb1
+// CHECK:       AIE.useLock(%[[of1_cons_prod_lock:.*]], AcquireGreaterEqual, 1)
+// CHECK:       AIE.dmaBd(<%[[of1_cons_buf_1:.*]] : memref<128xi32>, 0, 128>, 0)
+// CHECK:       AIE.useLock(%[[of1_cons_cons_lock:.*]], Release, 1)
+// CHECK:       AIE.nextBd ^bb1
+// CHECK:     ^bb3:  // pred: ^bb0
+// CHECK:       AIE.end
+// CHECK:     }
+// CHECK:     %20 = AIE.mem(%[[tile_2_3:.*]]) {
+// CHECK:       %21 = AIE.dmaStart(S2MM, 0, ^bb1, ^bb3)
+// CHECK:     ^bb1:  // 2 preds: ^bb0, ^bb2
+// CHECK:       AIE.useLock(%[[of2_cons_prod_lock:.*]], AcquireGreaterEqual, 1)
+// CHECK:       AIE.dmaBd(<%[[of2_cons_buf_0:.*]] : memref<128xi32>, 0, 128>, 0)
+// CHECK:       AIE.useLock(%[[of2_cons_cons_lock:.*]], Release, 1)
+// CHECK:       AIE.nextBd ^bb2
+// CHECK:     ^bb2:  // pred: ^bb1
+// CHECK:       AIE.useLock(%[[of2_cons_prod_lock:.*]], AcquireGreaterEqual, 1)
+// CHECK:       AIE.dmaBd(<%[[of2_cons_buf_1:.*]] : memref<128xi32>, 0, 128>, 0)
+// CHECK:       AIE.useLock(%[[of2_cons_cons_lock:.*]], Release, 1)
+// CHECK:       AIE.nextBd ^bb1
+// CHECK:     ^bb3:  // pred: ^bb0
+// CHECK:       AIE.end
+// CHECK:     }
+// CHECK:   }
+// CHECK: }
 
 module @ndDMAObjFifoAIE2 {
  AIE.device(xcve2302) {
@@ -17,8 +119,7 @@ module @ndDMAObjFifoAIE2 {
     %tile22 = AIE.tile(2, 2)
     %tile23 = AIE.tile(2, 3)
 
-    AIE.objectFifo @of0 (%tile10, {%tile11 fromStream [<32, 16>,
-                                                       < 8,  1>]}, 
+    AIE.objectFifo @of0 (%tile10, {%tile11}, 
                          2 : i32) : !AIE.objectFifo<memref<256xi32>>
 
     AIE.objectFifo @of1 (%tile11 toStream [< 4,64>,

diff --git a/test/objectFifo-stateful-transform/nd_dma_distribute_AIE2_bad.mlir b/test/objectFifo-stateful-transform/nd_dma_distribute_AIE2_bad.mlir
@@ -0,0 +1,38 @@
+//===- nd_dma_distribute_AIE2_bad.mlir -------------------------*- MLIR -*-===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Copyright (C) 2023, Advanced Micro Devices, Inc.
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: aie-opt --aie-objectFifo-stateful-transform --verify-diagnostics %s
+
+module @ndDMAObjFifoAIE2 {
+ AIE.device(xcve2302) {
+    %tile10 = AIE.tile(1, 0)
+    %tile11 = AIE.tile(1, 1)
+    %tile22 = AIE.tile(2, 2)
+    %tile23 = AIE.tile(2, 3)
+
+    AIE.objectFifo @of0 (%tile10, {%tile11 fromStream [<32, 16>,
+                                                       < 8,  1>]}, 
+                         2 : i32) : !AIE.objectFifo<memref<256xi32>>
+
+    AIE.objectFifo @of1 (%tile11 toStream [< 4,64>,
+                                           < 2, 4>, 
+                                           < 8, 8>, 
+                                           < 4, 1>],
+                        {%tile22}, 2 : i32) : !AIE.objectFifo<memref<128xi32>>
+
+    AIE.objectFifo @of2 (%tile11 toStream [< 4,64>,
+                                           < 2, 4>, 
+                                           < 8, 8>, 
+                                           < 4, 1>],
+                        {%tile23}, 2 : i32) : !AIE.objectFifo<memref<128xi32>>
+   // expected-error@+1 {{'AIE.objectFifo.link' op currently does not support objectFifos with dimensionsFromStreamPerConsumer.}}
+   AIE.objectFifo.link [ @of0 ] -> [ @of1, @of2 ] ()
+ }
+}
diff --git a/test/objectFifo-stateful-transform/nd_dma_distribute_broadcast_AIE2_bad.mlir b/test/objectFifo-stateful-transform/nd_dma_distribute_broadcast_AIE2_bad.mlir
@@ -0,0 +1,39 @@
+//===- nd_dma_distribute_broadcast_AIE2_bad.mlir ---------------*- MLIR -*-===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Copyright (C) 2023, Advanced Micro Devices, Inc.
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: aie-opt --aie-objectFifo-stateful-transform --verify-diagnostics %s
+
+module @ndDMAObjFifoAIE2 {
+ AIE.device(xcve2302) {
+    %tile10 = AIE.tile(1, 0)
+    %tile11 = AIE.tile(1, 1)
+    %tile12 = AIE.tile(1, 2)
+    %tile22 = AIE.tile(2, 2)
+    %tile13 = AIE.tile(1, 3)
+    %tile23 = AIE.tile(2, 3)
+
+    AIE.objectFifo @of0 (%tile10, {%tile11}, 
+                         2 : i32) : !AIE.objectFifo<memref<256xi32>>
+
+    AIE.objectFifo @of1 (%tile11 toStream [< 4,64>,
+                                           < 2, 4>, 
+                                           < 8, 8>, 
+                                           < 4, 1>],
+                        {%tile12, %tile22}, 2 : i32) : !AIE.objectFifo<memref<128xi32>>
+
+    AIE.objectFifo @of2 (%tile11 toStream [< 4,64>,
+                                           < 2, 4>, 
+                                           < 8, 8>, 
+                                           < 4, 1>],
+                        {%tile13, %tile23}, 2 : i32) : !AIE.objectFifo<memref<128xi32>>
+   // expected-error@+1 {{'AIE.objectFifo.link' op currently does not support objectFifos with dimensionsToStream and multiple consumers.}}
+   AIE.objectFifo.link [ @of0 ] -> [ @of1, @of2 ] ()
+ }
+}