Skip to content

Commit

Permalink
ObjFIFO Multi-Dimensional Patch (#692)
Browse files Browse the repository at this point in the history
* Allow objFIFO link transforms on distribute with no bcast

* Allow nd DMA for types other than i32

* Add tests

* clang format

* Header name fix

* Remove restriction of 1xi32 for memrefs in ND DMABDs

* clang format
  • Loading branch information
jgmelber authored Oct 23, 2023
1 parent 859220b commit 6b1c08d
Show file tree
Hide file tree
Showing 6 changed files with 187 additions and 93 deletions.
18 changes: 6 additions & 12 deletions lib/Dialect/AIE/IR/AIEDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -582,9 +582,10 @@ LogicalResult xilinx::AIE::ObjectFifoLinkOp::verify() {

int outputSize = 0;
for (auto fifoOut : getOutputObjectFifos()) {
if (fifoOut.getDimensionsToStream().size() > 0) {
if ((fifoOut.getDimensionsToStream().size() > 0) &&
(fifoOut.getConsumerTiles().size() > 1)) {
return emitOpError("currently does not support objectFifos with "
"dimensionsToStream.");
"dimensionsToStream and multiple consumers.");
}
for (auto dims : fifoOut.getDimensionsFromStreamPerConsumer()) {
if (dims.size() > 0)
Expand Down Expand Up @@ -1337,18 +1338,11 @@ LogicalResult xilinx::AIE::DMABDOp::verify() {
// The following checks only apply if non-default strides/wraps are defined.
if (getDimensions()) {
::mlir::MemRefType buffer = getBuffer().getType();
// We are restrictive about the type of the memref used as the input address
// We are not restrictive about the type of the memref used as the input
// to the DMABD when used with multi-dimensional strides/wraps. Since the
// BD will use the memref as a base address and copy from it in 32 bit
// chunks, while assuming the layout of the memref is contiguous, we
// disallow anything whose elemental size is not 32 bits, or where we
// cannot verify that the layout is contiguous.
if (!buffer.getElementType().isInteger(32) || buffer.getRank() > 1 ||
!buffer.getLayout().isIdentity()) {
return emitOpError() << "Specifying transfer step sizes and wraps is only"
" supported for one-dimensional memrefs of 32 bit"
" integer elements.";
}
// chunks, while assuming the layout of the memref is contiguous. We
// assume the user/compiler understands and accounts for this.
uint64_t memref_size = 1; // in bytes
uint64_t max_idx = 0;
for (int64_t memref_dim : buffer.getShape()) {
Expand Down
38 changes: 0 additions & 38 deletions test/dialect/AIE/nd-dma-wrong-rank.mlir

This file was deleted.

40 changes: 0 additions & 40 deletions test/dialect/AIE/nd-dma-wrong-type.mlir

This file was deleted.

107 changes: 104 additions & 3 deletions test/objectFifo-stateful-transform/nd_dma_distribute_AIE2.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,109 @@
//
//===----------------------------------------------------------------------===//

// RUN: aie-opt --aie-objectFifo-stateful-transform --verify-diagnostics %s
// RUN: aie-opt --aie-objectFifo-stateful-transform %s

// CHECK: module @ndDMAObjFifoAIE2 {
// CHECK: AIE.device(xcve2302) {
// CHECK: memref.global "public" @of2_cons : memref<128xi32>
// CHECK: memref.global "public" @of2 : memref<128xi32>
// CHECK: memref.global "public" @of1_cons : memref<128xi32>
// CHECK: memref.global "public" @of1 : memref<128xi32>
// CHECK: memref.global "public" @of0_cons : memref<256xi32>
// CHECK: memref.global "public" @of0 : memref<256xi32>
// CHECK: %[[tile_1_0:.*]] = AIE.tile(1, 0)
// CHECK: %[[tile_1_1:.*]] = AIE.tile(1, 1)
// CHECK: %[[tile_2_2:.*]] = AIE.tile(2, 2)
// CHECK: %[[tile_2_3:.*]] = AIE.tile(2, 3)
// CHECK: %[[of2_cons_buf_0:.*]] = AIE.buffer(%[[tile_2_3:.*]]) {sym_name = "of2_cons_buff_0"} : memref<128xi32>
// CHECK: %[[of2_cons_buf_1:.*]] = AIE.buffer(%[[tile_2_3:.*]]) {sym_name = "of2_cons_buff_1"} : memref<128xi32>
// CHECK: %[[of2_cons_prod_lock:.*]] = AIE.lock(%[[tile_2_3:.*]], 0) {init = 2 : i32, sym_name = "of2_cons_prod_lock"}
// CHECK: %[[of2_cons_cons_lock:.*]] = AIE.lock(%[[tile_2_3:.*]], 1) {init = 0 : i32, sym_name = "of2_cons_cons_lock"}
// CHECK: %[[of1_cons_buf_0:.*]] = AIE.buffer(%[[tile_2_2:.*]]) {sym_name = "of1_cons_buff_0"} : memref<128xi32>
// CHECK: %[[of1_cons_buf_1:.*]] = AIE.buffer(%[[tile_2_2:.*]]) {sym_name = "of1_cons_buff_1"} : memref<128xi32>
// CHECK: %[[of1_cons_prod_lock:.*]] = AIE.lock(%[[tile_2_2:.*]], 0) {init = 2 : i32, sym_name = "of1_cons_prod_lock"}
// CHECK: %[[of1_cons_cons_lock:.*]] = AIE.lock(%[[tile_2_2:.*]], 1) {init = 0 : i32, sym_name = "of1_cons_cons_lock"}
// CHECK: %[[of0_cons_buf_0:.*]] = AIE.buffer(%[[tile_1_1:.*]]) {sym_name = "of0_cons_buff_0"} : memref<256xi32>
// CHECK: %[[of0_cons_buf_1:.*]] = AIE.buffer(%[[tile_1_1:.*]]) {sym_name = "of0_cons_buff_1"} : memref<256xi32>
// CHECK: %[[of0_cons_prod_lock:.*]] = AIE.lock(%[[tile_1_1:.*]], 0) {init = 4 : i32, sym_name = "of0_cons_prod_lock"}
// CHECK: %[[of0_cons_cons_lock:.*]] = AIE.lock(%[[tile_1_1:.*]], 1) {init = 0 : i32, sym_name = "of0_cons_cons_lock"}
// CHECK: %[[of0_prod_lock:.*]] = AIE.lock(%[[tile_1_0:.*]], 0) {init = 0 : i32, sym_name = "of0_prod_lock"}
// CHECK: %[[of0_cons_lock:.*]] = AIE.lock(%[[tile_1_0:.*]], 1) {init = 0 : i32, sym_name = "of0_cons_lock"}
// CHECK: AIE.flow(%[[tile_1_0:.*]], DMA : 0, %[[tile_1_1:.*]], DMA : 0)
// CHECK: AIE.flow(%[[tile_1_1:.*]], DMA : 0, %[[tile_2_2:.*]], DMA : 0)
// CHECK: AIE.flow(%[[tile_1_1:.*]], DMA : 1, %[[tile_2_3:.*]], DMA : 0)
// CHECK: AIE.shimDMAAllocation @of0(MM2S, 0, 1)
// CHECK: %18 = AIE.memTileDMA(%[[tile_1_1:.*]]) {
// CHECK: %21 = AIE.dmaStart(S2MM, 0, ^bb1, ^bb3)
// CHECK: ^bb1: // 2 preds: ^bb0, ^bb2
// CHECK: AIE.useLock(%[[of0_cons_prod_lock:.*]], AcquireGreaterEqual, 2)
// CHECK: AIE.dmaBd(<%[[of0_cons_buf_0:.*]] : memref<256xi32>, 0, 256>, 0)
// CHECK: AIE.useLock(%[[of0_cons_cons_lock:.*]], Release, 2)
// CHECK: AIE.nextBd ^bb2
// CHECK: ^bb2: // pred: ^bb1
// CHECK: AIE.useLock(%[[of0_cons_prod_lock:.*]], AcquireGreaterEqual, 2)
// CHECK: AIE.dmaBd(<%[[of0_cons_buf_1:.*]] : memref<256xi32>, 0, 256>, 0)
// CHECK: AIE.useLock(%[[of0_cons_cons_lock:.*]], Release, 2)
// CHECK: AIE.nextBd ^bb1
// CHECK: ^bb3: // pred: ^bb0
// CHECK: %22 = AIE.dmaStart(MM2S, 0, ^bb4, ^bb6)
// CHECK: ^bb4: // 2 preds: ^bb3, ^bb5
// CHECK: AIE.useLock(%[[of0_cons_cons_lock:.*]], AcquireGreaterEqual, 1)
// CHECK: AIE.dmaBd(<%[[of0_cons_buf_0:.*]] : memref<256xi32>, 0, 128>, 0, [<4, 64>, <2, 4>, <8, 8>, <4, 1>])
// CHECK: AIE.useLock(%[[of0_cons_prod_lock:.*]], Release, 1)
// CHECK: AIE.nextBd ^bb5
// CHECK: ^bb5: // pred: ^bb4
// CHECK: AIE.useLock(%[[of0_cons_cons_lock:.*]], AcquireGreaterEqual, 1)
// CHECK: AIE.dmaBd(<%[[of0_cons_buf_1:.*]] : memref<256xi32>, 0, 128>, 0, [<4, 64>, <2, 4>, <8, 8>, <4, 1>])
// CHECK: AIE.useLock(%[[of0_cons_prod_lock:.*]], Release, 1)
// CHECK: AIE.nextBd ^bb4
// CHECK: ^bb6: // pred: ^bb3
// CHECK: %23 = AIE.dmaStart(MM2S, 1, ^bb7, ^bb9)
// CHECK: ^bb7: // 2 preds: ^bb6, ^bb8
// CHECK: AIE.useLock(%[[of0_cons_cons_lock:.*]], AcquireGreaterEqual, 1)
// CHECK: AIE.dmaBd(<%[[of0_cons_buf_0:.*]] : memref<256xi32>, 512, 128>, 0, [<4, 64>, <2, 4>, <8, 8>, <4, 1>])
// CHECK: AIE.useLock(%[[of0_cons_prod_lock:.*]], Release, 1)
// CHECK: AIE.nextBd ^bb8
// CHECK: ^bb8: // pred: ^bb7
// CHECK: AIE.useLock(%[[of0_cons_cons_lock:.*]], AcquireGreaterEqual, 1)
// CHECK: AIE.dmaBd(<%[[of0_cons_buf_1:.*]] : memref<256xi32>, 512, 128>, 0, [<4, 64>, <2, 4>, <8, 8>, <4, 1>])
// CHECK: AIE.useLock(%[[of0_cons_prod_lock:.*]], Release, 1)
// CHECK: AIE.nextBd ^bb7
// CHECK: ^bb9: // pred: ^bb6
// CHECK: AIE.end
// CHECK: }
// CHECK: %19 = AIE.mem(%[[tile_2_2:.*]]) {
// CHECK: %21 = AIE.dmaStart(S2MM, 0, ^bb1, ^bb3)
// CHECK: ^bb1: // 2 preds: ^bb0, ^bb2
// CHECK: AIE.useLock(%[[of1_cons_prod_lock:.*]], AcquireGreaterEqual, 1)
// CHECK: AIE.dmaBd(<%[[of1_cons_buf_0:.*]] : memref<128xi32>, 0, 128>, 0)
// CHECK: AIE.useLock(%[[of1_cons_cons_lock:.*]], Release, 1)
// CHECK: AIE.nextBd ^bb2
// CHECK: ^bb2: // pred: ^bb1
// CHECK: AIE.useLock(%[[of1_cons_prod_lock:.*]], AcquireGreaterEqual, 1)
// CHECK: AIE.dmaBd(<%[[of1_cons_buf_1:.*]] : memref<128xi32>, 0, 128>, 0)
// CHECK: AIE.useLock(%[[of1_cons_cons_lock:.*]], Release, 1)
// CHECK: AIE.nextBd ^bb1
// CHECK: ^bb3: // pred: ^bb0
// CHECK: AIE.end
// CHECK: }
// CHECK: %20 = AIE.mem(%[[tile_2_3:.*]]) {
// CHECK: %21 = AIE.dmaStart(S2MM, 0, ^bb1, ^bb3)
// CHECK: ^bb1: // 2 preds: ^bb0, ^bb2
// CHECK: AIE.useLock(%[[of2_cons_prod_lock:.*]], AcquireGreaterEqual, 1)
// CHECK: AIE.dmaBd(<%[[of2_cons_buf_0:.*]] : memref<128xi32>, 0, 128>, 0)
// CHECK: AIE.useLock(%[[of2_cons_cons_lock:.*]], Release, 1)
// CHECK: AIE.nextBd ^bb2
// CHECK: ^bb2: // pred: ^bb1
// CHECK: AIE.useLock(%[[of2_cons_prod_lock:.*]], AcquireGreaterEqual, 1)
// CHECK: AIE.dmaBd(<%[[of2_cons_buf_1:.*]] : memref<128xi32>, 0, 128>, 0)
// CHECK: AIE.useLock(%[[of2_cons_cons_lock:.*]], Release, 1)
// CHECK: AIE.nextBd ^bb1
// CHECK: ^bb3: // pred: ^bb0
// CHECK: AIE.end
// CHECK: }
// CHECK: }
// CHECK: }

module @ndDMAObjFifoAIE2 {
AIE.device(xcve2302) {
Expand All @@ -17,8 +119,7 @@ module @ndDMAObjFifoAIE2 {
%tile22 = AIE.tile(2, 2)
%tile23 = AIE.tile(2, 3)

AIE.objectFifo @of0 (%tile10, {%tile11 fromStream [<32, 16>,
< 8, 1>]},
AIE.objectFifo @of0 (%tile10, {%tile11},
2 : i32) : !AIE.objectFifo<memref<256xi32>>

AIE.objectFifo @of1 (%tile11 toStream [< 4,64>,
Expand Down
38 changes: 38 additions & 0 deletions test/objectFifo-stateful-transform/nd_dma_distribute_AIE2_bad.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
//===- nd_dma_distribute_AIE2_bad.mlir -------------------------*- MLIR -*-===//
//
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// Copyright (C) 2023, Advanced Micro Devices, Inc.
//
//===----------------------------------------------------------------------===//

// RUN: aie-opt --aie-objectFifo-stateful-transform --verify-diagnostics %s

module @ndDMAObjFifoAIE2 {
AIE.device(xcve2302) {
%tile10 = AIE.tile(1, 0)
%tile11 = AIE.tile(1, 1)
%tile22 = AIE.tile(2, 2)
%tile23 = AIE.tile(2, 3)

AIE.objectFifo @of0 (%tile10, {%tile11 fromStream [<32, 16>,
< 8, 1>]},
2 : i32) : !AIE.objectFifo<memref<256xi32>>

AIE.objectFifo @of1 (%tile11 toStream [< 4,64>,
< 2, 4>,
< 8, 8>,
< 4, 1>],
{%tile22}, 2 : i32) : !AIE.objectFifo<memref<128xi32>>

AIE.objectFifo @of2 (%tile11 toStream [< 4,64>,
< 2, 4>,
< 8, 8>,
< 4, 1>],
{%tile23}, 2 : i32) : !AIE.objectFifo<memref<128xi32>>
// expected-error@+1 {{'AIE.objectFifo.link' op currently does not support objectFifos with dimensionsFromStreamPerConsumer.}}
AIE.objectFifo.link [ @of0 ] -> [ @of1, @of2 ] ()
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
//===- nd_dma_distribute_broadcast_AIE2_bad.mlir ---------------*- MLIR -*-===//
//
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// Copyright (C) 2023, Advanced Micro Devices, Inc.
//
//===----------------------------------------------------------------------===//

// RUN: aie-opt --aie-objectFifo-stateful-transform --verify-diagnostics %s

module @ndDMAObjFifoAIE2 {
AIE.device(xcve2302) {
%tile10 = AIE.tile(1, 0)
%tile11 = AIE.tile(1, 1)
%tile12 = AIE.tile(1, 2)
%tile22 = AIE.tile(2, 2)
%tile13 = AIE.tile(1, 3)
%tile23 = AIE.tile(2, 3)

AIE.objectFifo @of0 (%tile10, {%tile11},
2 : i32) : !AIE.objectFifo<memref<256xi32>>

AIE.objectFifo @of1 (%tile11 toStream [< 4,64>,
< 2, 4>,
< 8, 8>,
< 4, 1>],
{%tile12, %tile22}, 2 : i32) : !AIE.objectFifo<memref<128xi32>>

AIE.objectFifo @of2 (%tile11 toStream [< 4,64>,
< 2, 4>,
< 8, 8>,
< 4, 1>],
{%tile13, %tile23}, 2 : i32) : !AIE.objectFifo<memref<128xi32>>
// expected-error@+1 {{'AIE.objectFifo.link' op currently does not support objectFifos with dimensionsToStream and multiple consumers.}}
AIE.objectFifo.link [ @of0 ] -> [ @of1, @of2 ] ()
}
}

0 comments on commit 6b1c08d

Please sign in to comment.