Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Zero padding on MemTiles #1874

Draft
wants to merge 44 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
7ed306d
Zero Padding python binding
pvasireddy-amd Sep 25, 2024
266b50b
Padding at python level
pvasireddy-amd Sep 25, 2024
47dfc0a
Connecting padding from fifo to dmabd
pvasireddy-amd Sep 25, 2024
221507e
Padding
pvasireddy-amd Sep 25, 2024
d7585b8
Slight adjustment
pvasireddy-amd Sep 26, 2024
ae7dc3c
Test cases
pvasireddy-amd Sep 26, 2024
c020094
Small changes
pvasireddy-amd Oct 1, 2024
31a8a3a
Merge branch 'main' of https://github.com/Xilinx/mlir-aie into zero_pad
pvasireddy-amd Oct 1, 2024
13ebe8b
Revert changes
pvasireddy-amd Oct 1, 2024
5b39c9e
Runtime zero padding on MemTile
pvasireddy-amd Oct 1, 2024
496d3a2
Padding through dma-tasks-to-npu
pvasireddy-amd Oct 9, 2024
468c195
ObjectFifo example
pvasireddy-amd Oct 9, 2024
28e5ea0
Example code and DmaTasksToNpu
pvasireddy-amd Oct 16, 2024
fced7a9
Removed comment
pvasireddy-amd Oct 16, 2024
8867b5c
Merging main
pvasireddy-amd Oct 23, 2024
613617f
clang format
pvasireddy-amd Oct 23, 2024
e05429b
Python format
pvasireddy-amd Oct 23, 2024
6e40239
Remove unnecessary changes
pvasireddy-amd Oct 23, 2024
58686f5
Clang format
pvasireddy-amd Oct 23, 2024
6302047
Missing comma
pvasireddy-amd Oct 23, 2024
30b4f9c
Python format
pvasireddy-amd Oct 23, 2024
e8b3bbb
Python format
pvasireddy-amd Oct 23, 2024
6b5e0d4
Python format
pvasireddy-amd Oct 23, 2024
79348c3
Extra case
pvasireddy-amd Oct 25, 2024
af6ab71
Merge branch 'main' into zero_pad
pvasireddy-amd Oct 25, 2024
7db1506
Run command
pvasireddy-amd Oct 25, 2024
7c0a256
Merge branch 'zero_pad' of https://github.com/Xilinx/mlir-aie into ze…
pvasireddy-amd Oct 25, 2024
7975a17
Example code
pvasireddy-amd Oct 25, 2024
5f791fd
Push the new change
pvasireddy-amd Oct 25, 2024
6e13cbb
Add zero padding to writebd in trace
pvasireddy-amd Oct 25, 2024
5855171
Update writebd in tests
pvasireddy-amd Oct 25, 2024
f6ce5c7
Test case
pvasireddy-amd Oct 25, 2024
6ac9524
Message error
pvasireddy-amd Oct 25, 2024
ac5b356
Adding D2Size parameter
pvasireddy-amd Oct 31, 2024
7b76da3
Empty padDims on MemTile MM2S channel
pvasireddy-amd Oct 31, 2024
743cbe2
clang-format
pvasireddy-amd Oct 31, 2024
5adf8bf
clang-format
pvasireddy-amd Oct 31, 2024
e75688d
Reveert changes
pvasireddy-amd Oct 31, 2024
94229b9
Changes to tests with D2Size
pvasireddy-amd Oct 31, 2024
1a4ddb2
Merge branch 'main' of https://github.com/Xilinx/mlir-aie into zero_pad
pvasireddy-amd Oct 31, 2024
71722af
D2Size
pvasireddy-amd Oct 31, 2024
5e12917
Checking syntax
pvasireddy-amd Nov 1, 2024
e5dd194
Revert change
pvasireddy-amd Nov 1, 2024
e872b36
Merge branch 'main' into zero_pad
pvasireddy-amd Nov 1, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions include/aie/Dialect/AIE/IR/AIEOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -1689,7 +1689,8 @@ def AIE_ObjectFifoCreateOp: AIE_Op<"objectfifo", [HasParent<"DeviceOp">, Symbol]
// via_shared_mem==1 means use consumer tile's memory module
OptionalAttr<AIEI32Attr>:$via_shared_mem,
// memtile_repeat==0 means "do it once" and don't repeat
OptionalAttr<AIEI32Attr>:$memtile_repeat
OptionalAttr<AIEI32Attr>:$memtile_repeat,
OptionalAttr<BDPadLayoutArrayAttr>:$padDimensions
);

let assemblyFormat = [{
Expand Down Expand Up @@ -1728,7 +1729,8 @@ def AIE_ObjectFifoCreateOp: AIE_Op<"objectfifo", [HasParent<"DeviceOp">, Symbol]
OpBuilder<(ins "mlir::StringAttr":$sym_name, "mlir::Value":$producerTile,
"mlir::ValueRange":$consumerTiles, "mlir::Attribute":$elemNumber, "mlir::Type":$elem_type,
CArg<"llvm::ArrayRef<AIE::BDDimLayoutAttr>", "{}">:$dimensionsToStream,
CArg<"llvm::ArrayRef<AIE::BDDimLayoutArrayAttr>", "{}">:$dimensionsFromStreamPerConsumer), [{
CArg<"llvm::ArrayRef<AIE::BDDimLayoutArrayAttr>", "{}">:$dimensionsFromStreamPerConsumer,
CArg<"llvm::ArrayRef<AIE::BDPadLayoutArrayAttr>", "{}">:$padDimensions), [{
odsState.addOperands(producerTile);
odsState.addOperands(consumerTiles);
odsState.addAttribute(getSymNameAttrName(odsState.name), sym_name);
Expand Down
17 changes: 15 additions & 2 deletions include/aie/Dialect/AIEX/IR/AIEX.td
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,13 @@ def AIE_NpuDmaMemcpyNdOp: AIEX_Op<"npu.dma_memcpy_nd", [
OptionalAttr<PacketInfoAttr>:$packet,
FlatSymbolRefAttr:$metadata,
I64Attr:$id,
DefaultValuedOptionalAttr<BoolAttr, "false">:$issue_token
DefaultValuedOptionalAttr<BoolAttr, "false">:$issue_token,
DefaultValuedOptionalAttr<I64Attr, "0">:$d0_zero_before,
DefaultValuedOptionalAttr<I64Attr, "0">:$d1_zero_before,
DefaultValuedOptionalAttr<I64Attr, "0">:$d2_zero_before,
DefaultValuedOptionalAttr<I64Attr, "0">:$d0_zero_after,
DefaultValuedOptionalAttr<I64Attr, "0">:$d1_zero_after,
DefaultValuedOptionalAttr<I64Attr, "0">:$d2_zero_after
);

let assemblyFormat = [{
Expand Down Expand Up @@ -828,6 +834,7 @@ def AIE_NpuWriteBdOp: AIEX_Op<"npu.writebd", []> {
I32Attr:$d0_stride,
I32Attr:$d1_size,
I32Attr:$d1_stride,
I32Attr:$d2_size,
I32Attr:$d2_stride,
I32Attr:$iteration_current,
I32Attr:$iteration_size,
Expand All @@ -840,7 +847,13 @@ def AIE_NpuWriteBdOp: AIEX_Op<"npu.writebd", []> {
I32Attr:$lock_rel_id,
I32Attr:$lock_acq_enable,
I32Attr:$lock_acq_val,
I32Attr:$lock_acq_id
I32Attr:$lock_acq_id,
I32Attr:$d0_zero_before,
I32Attr:$d1_zero_before,
I32Attr:$d2_zero_before,
I32Attr:$d0_zero_after,
I32Attr:$d1_zero_after,
I32Attr:$d2_zero_after
);
let results = (outs );
let assemblyFormat = [{ attr-dict }];
Expand Down
46 changes: 31 additions & 15 deletions lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -459,14 +459,19 @@ struct AIEObjectFifoStatefulTransformPass
void createBd(OpBuilder &builder, LockOp acqLock, int acqMode,
LockAction acqLockAction, LockOp relLock, int relMode,
MyOp buff, int offset, int len, Block *succ,
BDDimLayoutArrayAttr dims) {
BDDimLayoutArrayAttr dims, BDPadLayoutArrayAttr padDimensions) {
if (acqLock)
builder.create<UseLockOp>(builder.getUnknownLoc(), acqLock, acqLockAction,
acqMode);
if (!dims.getValue().empty())

if (!dims.getValue().empty() && !padDimensions.getValue().empty()) {
builder.create<DMABDOp>(builder.getUnknownLoc(), buff, offset, len, dims,
padDimensions);
} else if (!dims.getValue().empty()) {
builder.create<DMABDOp>(builder.getUnknownLoc(), buff, offset, len, dims);
else
} else {
builder.create<DMABDOp>(builder.getUnknownLoc(), buff, offset, len);
}
if (acqLock)
builder.create<UseLockOp>(builder.getUnknownLoc(), relLock,
LockAction::Release, relMode);
Expand All @@ -480,7 +485,8 @@ struct AIEObjectFifoStatefulTransformPass
void createBdBlock(OpBuilder &builder, ObjectFifoCreateOp op, int lockMode,
int acqNum, int relNum, MyOp buff, int offset, int len,
DMAChannelDir channelDir, size_t blockIndex, Block *succ,
BDDimLayoutArrayAttr dims) {
BDDimLayoutArrayAttr dims,
BDPadLayoutArrayAttr padDimensions) {
LockOp acqLock;
LockOp relLock;
int acqMode = 1;
Expand All @@ -505,20 +511,27 @@ struct AIEObjectFifoStatefulTransformPass
}
}
createBd(builder, acqLock, acqMode, acqLockAction, relLock, relMode, buff,
offset, len, succ, dims);
offset, len, succ, dims, padDimensions);
}

/// Function that either calls createAIETileDMA(), createShimDMA() or
/// createMemTileDMA() based on op tile row value.
void createDMA(DeviceOp &device, OpBuilder &builder, ObjectFifoCreateOp op,
DMAChannelDir channelDir, int channelIndex, int lockMode,
BDDimLayoutArrayAttr dims) {
BDDimLayoutArrayAttr dims, BDPadLayoutArrayAttr pad_dims) {
if (op.getProducerTileOp().isShimTile()) {
createShimDMA(device, builder, op, channelDir, channelIndex, lockMode,
dims);
} else if (op.getProducerTileOp().isMemTile()) {
} else if (op.getProducerTileOp().isMemTile() &&
channelDir == DMAChannelDir::MM2S &&
!pad_dims.getValue().empty()) {
createMemTileDMA(device, builder, op, channelDir, channelIndex, lockMode,
dims);
dims, pad_dims);
} else if (op.getProducerTileOp().isMemTile() &&
(channelDir == DMAChannelDir::S2MM ||
pad_dims.getValue().empty())) {
createMemTileDMA(device, builder, op, channelDir, channelIndex, lockMode,
dims, nullptr);
} else {
createAIETileDMA(device, builder, op, channelDir, channelIndex, lockMode,
dims);
Expand Down Expand Up @@ -602,7 +615,7 @@ struct AIEObjectFifoStatefulTransformPass
builder.setInsertionPointToStart(curr);
createBdBlock<BufferOp>(builder, target, lockMode, acqNum, relNum,
buffersPerFifo[target][blockIndex], /*offset*/ 0,
len, channelDir, blockIndex, succ, dims);
len, channelDir, blockIndex, succ, dims, nullptr);
curr = succ;
blockIndex++;
}
Expand Down Expand Up @@ -678,7 +691,7 @@ struct AIEObjectFifoStatefulTransformPass
createBdBlock<ExternalBufferOp>(builder, op, lockMode, acqNum, relNum,
externalBuffersPerFifo[op][blockIndex],
/*offset*/ 0, len, channelDir, blockIndex,
succ, dims);
succ, dims, nullptr);
curr = succ;
blockIndex++;
}
Expand All @@ -689,7 +702,8 @@ struct AIEObjectFifoStatefulTransformPass
void createMemTileDMA(DeviceOp &device, OpBuilder &builder,
ObjectFifoCreateOp op, DMAChannelDir channelDir,
int channelIndex, int lockMode,
BDDimLayoutArrayAttr dims) {
BDDimLayoutArrayAttr dims,
BDPadLayoutArrayAttr padDimensions) {
size_t numBlocks = op.size();
if (numBlocks == 0)
return;
Expand All @@ -710,6 +724,7 @@ struct AIEObjectFifoStatefulTransformPass
dims.getValue().drop_front(1));
}
}

if (op.getMemtileRepeat().has_value())
repeatCount = op.getMemtileRepeat().value();

Expand Down Expand Up @@ -839,7 +854,8 @@ struct AIEObjectFifoStatefulTransformPass
offset = extraOffset;
createBdBlock<BufferOp>(builder, target, lockMode, acqNum, relNum,
buffersPerFifo[target][blockIndex], offset,
lenOut, channelDir, blockIndex, succ, dims);
lenOut, channelDir, blockIndex, succ, dims,
padDimensions);
curr = succ;
blockIndex++;
}
Expand Down Expand Up @@ -1303,7 +1319,6 @@ struct AIEObjectFifoStatefulTransformPass
auto consumerWireType = WireBundle::DMA;
std::set<TileOp>
objectFifoTiles; // track cores to check for loops during unrolling

//===------------------------------------------------------------------===//
// Split objectFifos into a consumer end and producer end if needed
//===------------------------------------------------------------------===//
Expand Down Expand Up @@ -1446,7 +1461,8 @@ struct AIEObjectFifoStatefulTransformPass
DMAChannel producerChan =
dmaAnalysis.getMasterDMAChannel(producer.getProducerTile());
createDMA(device, builder, producer, producerChan.direction,
producerChan.channel, 0, producer.getDimensionsToStreamAttr());
producerChan.channel, 0, producer.getDimensionsToStreamAttr(),
producer.getPadDimensionsAttr());
// generate objectFifo allocation info
builder.setInsertionPoint(&device.getBody()->back());

Expand All @@ -1464,7 +1480,7 @@ struct AIEObjectFifoStatefulTransformPass
BDDimLayoutArrayAttr consumerDims =
consumer.getDimensionsFromStreamPerConsumer()[0];
createDMA(device, builder, consumer, consumerChan.direction,
consumerChan.channel, 1, consumerDims);
consumerChan.channel, 1, consumerDims, nullptr);
// generate objectFifo allocation info
builder.setInsertionPoint(&device.getBody()->back());

Expand Down
7 changes: 7 additions & 0 deletions lib/Dialect/AIEX/IR/AIEXDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,13 @@ LogicalResult AIEX::NpuWriteBdOp::verify() {
return emitOpError("Iteration Size exceeds the [0:63] range.");
if (getIterationStride() > 0xFFFFF)
return emitOpError("Iteration Stride exceeds the [0:1M-1] range.");
if (targetModel.isShimNOCTile(getColumn(), getRow()) && getD2Size() != 0)
return emitOpError("ShimTile only supports 2 dimensions of sizes.");
if (targetModel.isShimNOCTile(getColumn(), getRow()) &&
(getD0ZeroBefore() != 0 || getD0ZeroAfter() != 0 ||
getD1ZeroBefore() != 0 || getD1ZeroAfter() != 0 ||
getD2ZeroBefore() != 0 || getD2ZeroAfter() != 0))
return emitOpError("ShimTile doesn't support zero padding.");
return success();
}

Expand Down
2 changes: 1 addition & 1 deletion lib/Dialect/AIEX/Transforms/AIECtrlPacketToDma.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ struct AIECtrlPacketToDmaPass : AIECtrlPacketToDmaBase<AIECtrlPacketToDmaPass> {
SmallVector<Value>{}, SmallVector<Value>{},
SmallVector<Value>{}, ArrayRef(staticOffsets),
ArrayRef(staticSizes), ArrayRef(staticStrides),
controllerIdPkt, metadata, 0, true);
controllerIdPkt, metadata, 0, true, 0, 0, 0, 0, 0, 0);

auto shimRow = builder.getI32IntegerAttr(0);
auto shimCol = builder.getI32IntegerAttr(col);
Expand Down
56 changes: 50 additions & 6 deletions lib/Dialect/AIEX/Transforms/AIEDMATasksToNPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,8 @@ struct AIEDMATasksToNPUPass : AIEDMATasksToNPUBase<AIEDMATasksToNPUPass> {
}

LogicalResult rewriteSingleBD(OpBuilder &builder, Block &block,
AIE::TileOp &tile) {
AIE::TileOp &tile,
AIE::DMAChannelDir channelDir) {
AIE::DMABDOp bd_op = getBdForBlock(block);
const auto &target_model = AIE::getTargetModel(bd_op);
MemRefType buffer_type = bd_op.getBuffer().getType();
Expand All @@ -237,12 +238,23 @@ struct AIEDMATasksToNPUPass : AIEDMATasksToNPUBase<AIEDMATasksToNPUPass> {
<< len << " bytes falls below minimum hardware transfer unit of "
<< (addr_granularity / 8) << " bytes.";
}

// Process strides/wraps
std::optional<llvm::ArrayRef<AIE::BDDimLayoutAttr>> dims =
bd_op.getDimensions();
llvm::SmallVector<int64_t, 4> sizes = llvm::SmallVector<int64_t, 4>(4, 0);
llvm::SmallVector<int64_t, 4> strides = llvm::SmallVector<int64_t, 4>(4, 0);
int64_t d2size = 0;

// Padding
std::optional<llvm::ArrayRef<AIE::BDPadLayoutAttr>> padDims =
bd_op.getPadDimensions();
llvm::SmallVector<int64_t, 4> padBefore =
llvm::SmallVector<int64_t, 4>(4, 0);
llvm::SmallVector<int64_t, 4> padAfter =
llvm::SmallVector<int64_t, 4>(4, 0);
std::fill(padBefore.begin(), padBefore.end(), 0);
std::fill(padAfter.begin(), padAfter.end(), 0);

if (dims && dims->size() > 0) {
llvm::SmallVector<int64_t, 4> input_sizes =
llvm::SmallVector<int64_t, 4>(4, 1);
Expand All @@ -260,6 +272,25 @@ struct AIEDMATasksToNPUPass : AIEDMATasksToNPUBase<AIEDMATasksToNPUPass> {
input_sizes[i] = (*dims)[j].getSize();
input_strides[i] = (*dims)[j].getStride();
}
d2size = (target_model.isMemTile(tile.getCol(), tile.getRow()))
? (*dims)[2].getSize()
: 0;
if (target_model.isMemTile(tile.getCol(), tile.getRow()) &&
channelDir == AIE::DMAChannelDir::MM2S) {
if (padDims && (padDims->size() > dims->size()))
return bd_op->emitOpError()
<< "Mismatch number of dimensions between padding(s)"
<< " and wrap(s) and stride(s).";
else if (padDims)
for (size_t i = 0; i < padDims->size(); i++) {
int j = padDims->size() - i - 1;
padBefore[i] = (*padDims)[j].getConstPadBefore();
padAfter[i] = (*padDims)[j].getConstPadAfter();
}
} else if (padDims) {
return bd_op->emitOpError()
<< "supports padding only for MM2S direction on MemTiles.";
}
getHardwareStridesWraps(target_model, buffer_type, input_sizes,
input_strides, sizes, strides);
if (failed(verifyStridesWraps(bd_op, buffer_type, tile.getCol(),
Expand Down Expand Up @@ -290,8 +321,16 @@ struct AIEDMATasksToNPUPass : AIEDMATasksToNPUBase<AIEDMATasksToNPUPass> {
"transfer length, as this is the BD repeat count.";
return failure();
}
} else {
if (padDims && target_model.isMemTile(tile.getCol(), tile.getRow()) &&
channelDir == AIE::DMAChannelDir::MM2S) {
return bd_op->emitOpError()
<< "Padding requires n-d data layouts expressed as "
<< "wrap(s) and stride(s).";
} else if (padDims) {
return bd_op->emitOpError() << "Padding is supported only on MemTiles.";
}
}

// find next BD ID, if any
uint32_t use_next_bd = 0;
uint32_t next_bd_id = 0;
Expand All @@ -306,7 +345,7 @@ struct AIEDMATasksToNPUPass : AIEDMATasksToNPUBase<AIEDMATasksToNPUPass> {
/* TODO: Strides/Wraps */
/*d0_size=*/sizes[0], /*d0_stride=*/strides[0],
/*d1_size=*/sizes[1], /*d1_stride=*/strides[1],
/*d2_stride=*/strides[2],
/*d2_size=*/d2size, /*d2_stride=*/strides[2],
/*iteration_current=*/0, /*iteration_size=*/sizes[3],
/*iteration_stride=*/strides[3],
/* TODO: Next BD */
Expand All @@ -316,7 +355,10 @@ struct AIEDMATasksToNPUPass : AIEDMATasksToNPUBase<AIEDMATasksToNPUPass> {
/*valid_bd=*/1,
/* TODO: Locks */
/*lock_rel_val=*/0, /*lock_rel_id=*/0, /*lock_acq_enable=*/0,
/*lock_acq_val=*/0, /*lock_ackq_id=*/0);
/*lock_acq_val=*/0, /*lock_ackq_id=*/0, /*d0_zero_before=*/padBefore[0],
/*d1_zero_before=*/padBefore[1], /*d2_zero_before=*/padBefore[2],
/*d0_zero_after=*/padAfter[0], /*d1_zero_after=*/padAfter[1],
/*d2_zero_after=*/padAfter[2]);

return setAddressForSingleBD(builder, bd_op, tile);
}
Expand Down Expand Up @@ -392,13 +434,15 @@ struct AIEDMATasksToNPUPass : AIEDMATasksToNPUBase<AIEDMATasksToNPUPass> {
return failure();
}

auto channelDir = op.getDirection();

// Lower all BDs
for (auto it = body.begin(); it != body.end(); ++it) {
Block &block = *it;
if (shouldSkipBlock(block)) {
continue;
}
if (failed(rewriteSingleBD(builder, block, tile))) {
if (failed(rewriteSingleBD(builder, block, tile, channelDir))) {
return failure();
}
}
Expand Down
Loading
Loading