Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add pass to assign BD IDs to NPU DMA ops and refactor AMDAIEAssignBufferDescriptorIDsPass #551

Merged
merged 2 commits into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <numeric>
#include <set>

#include "Passes.h"
#include "aie/Dialect/AIE/IR/AIEDialect.h"
#include "iree-amd-aie/aie_runtime/Utils/ChannelBdIdGenerator.h"
#include "iree-amd-aie/aie_runtime/iree_aie_runtime.h"
#include "mlir/Pass/Pass.h"

Expand All @@ -21,32 +23,99 @@ using namespace xilinx;
using namespace xilinx::AIE;

namespace mlir::iree_compiler::AMDAIE {
struct BdIdGenerator {
BdIdGenerator(int col, int row, AMDAIEDeviceModel &deviceModel)
: col(col), row(row), isMemTile(deviceModel.isMemTile(col, row)) {}

int32_t nextBdId(int channelIndex) {
int32_t bdId = isMemTile && channelIndex & 1 ? oddBdId++ : evenBdId++;
while (bdIdAlreadyAssigned(bdId))
bdId = isMemTile && channelIndex & 1 ? oddBdId++ : evenBdId++;
assignBdId(bdId);
return bdId;
}

void assignBdId(int32_t bdId) {
assert(!alreadyAssigned.count(bdId) && "bdId has already been assigned");
alreadyAssigned.insert(bdId);
}
/// Assign BD ids to DMABDOp's in MemOps.
LogicalResult assignBdIds(DeviceOp deviceOp) {
AMDAIEDeviceModel deviceModel = mlir::iree_compiler::AMDAIE::getDeviceModel(
static_cast<AMDAIEDevice>(deviceOp.getDevice()));

ChannelBdIdGenerator shimChannelBdIdGenerator(
deviceModel.getChannelToValidBdIds(AMDAIETileType::SHIMNOC));
ChannelBdIdGenerator memTileChannelBdIdGenerator(
deviceModel.getChannelToValidBdIds(AMDAIETileType::MEMTILE));

auto memOps = llvm::to_vector_of<TileElement>(deviceOp.getOps<MemOp>());
llvm::append_range(memOps, deviceOp.getOps<MemTileDMAOp>());
llvm::append_range(memOps, deviceOp.getOps<ShimDMAOp>());
for (TileElement memOp : memOps) {
int col = memOp.getTileID().col;
int row = memOp.getTileID().row;

// BdIdGenerator gen(col, row, deviceModel);
ChannelBdIdGenerator gen = deviceModel.isMemTile(col, row)
? memTileChannelBdIdGenerator
: shimChannelBdIdGenerator;

memOp->walk<WalkOrder::PreOrder>([&](DMABDOp bd) {
if (bd.getBdId().has_value()) gen.assignBdId(bd.getBdId().value());
});

DenseMap<Block *, int> blockChannelMap;
// Associate with each block the channel index specified by the
// dma_start
for (Block &block : memOp.getOperation()->getRegion(0))
for (auto op : block.getOps<DMAStartOp>()) {
int chNum = op.getChannelIndex();
blockChannelMap[&block] = chNum;
Block *dest = op.getDest();
while (dest) {
blockChannelMap[dest] = chNum;
if (dest->hasNoSuccessors()) break;
dest = dest->getSuccessors()[0];
if (blockChannelMap.contains(dest)) dest = nullptr;
}
}

bool bdIdAlreadyAssigned(int32_t bdId) { return alreadyAssigned.count(bdId); }
for (Block &block : memOp.getOperation()->getRegion(0)) {
if (block.getOps<DMABDOp>().empty()) continue;
assert(blockChannelMap.count(&block));
DMABDOp bd = (*block.getOps<DMABDOp>().begin());
if (bd.getBdId().has_value()) {
assert(gen.isBdIdAssigned(bd.getBdId().value()) &&
"bdId assigned by user but not found during previous walk");
} else {
std::optional<uint32_t> bdId =
gen.getAndAssignBdId(blockChannelMap[&block]);
if (!bdId)
return memOp.emitOpError()
<< "could not find and assign a valid BD id";
bd.setBdId(bdId.value());
}
}
}
for (TileElement memOp : memOps) {
DenseMap<Block *, int> blockBdIdMap;
for (Block &block : memOp.getOperation()->getRegion(0)) {
if (block.getOps<DMABDOp>().empty()) continue;
DMABDOp bd = *block.getOps<DMABDOp>().begin();
assert(bd.getBdId().has_value() &&
"DMABDOp should have bd_id assigned by now");
blockBdIdMap[&block] = bd.getBdId().value();
}

int col;
int row;
int oddBdId = ODD_BD_ID_START;
int evenBdId = EVEN_BD_ID_START;
bool isMemTile;
std::set<int32_t> alreadyAssigned;
};
for (Block &block : memOp.getOperation()->getRegion(0)) {
if (block.getOps<DMABDOp>().empty()) continue;
DMABDOp bd = *block.getOps<DMABDOp>().begin();
std::optional<int> nextBdId;
if (block.getNumSuccessors()) {
assert(llvm::range_size(block.getSuccessors()) == 1 &&
"should have only one successor block");
Block *nextBlock = block.getSuccessor(0);
if (!blockBdIdMap.contains(nextBlock))
assert(nextBlock->getOperations().size() == 1 &&
// for some reason i can't stick both of ops in a single
// isa<...>
(isa<EndOp>(nextBlock->getOperations().front()) ||
isa<DMAStartOp>(nextBlock->getOperations().front())) &&
"bb that's not in blockMap can only have aie.end");
else
nextBdId = blockBdIdMap[nextBlock];
bd.setNextBdId(nextBdId);
}
}
}
return success();
}

struct AMDAIEAssignBufferDescriptorIDsPass : mlir::OperationPass<DeviceOp> {
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(
Expand All @@ -70,79 +139,7 @@ struct AMDAIEAssignBufferDescriptorIDsPass : mlir::OperationPass<DeviceOp> {

void runOnOperation() override {
DeviceOp deviceOp = getOperation();
AMDAIEDeviceModel deviceModel = mlir::iree_compiler::AMDAIE::getDeviceModel(
static_cast<AMDAIEDevice>(deviceOp.getDevice()));

auto memOps = llvm::to_vector_of<TileElement>(deviceOp.getOps<MemOp>());
llvm::append_range(memOps, deviceOp.getOps<MemTileDMAOp>());
llvm::append_range(memOps, deviceOp.getOps<ShimDMAOp>());
for (TileElement memOp : memOps) {
int col = memOp.getTileID().col;
int row = memOp.getTileID().row;

BdIdGenerator gen(col, row, deviceModel);
memOp->walk<WalkOrder::PreOrder>([&](DMABDOp bd) {
if (bd.getBdId().has_value()) gen.assignBdId(bd.getBdId().value());
});

DenseMap<Block *, int> blockChannelMap;
// Associate with each block the channel index specified by the
// dma_start
for (Block &block : memOp.getOperation()->getRegion(0))
for (auto op : block.getOps<DMAStartOp>()) {
int chNum = op.getChannelIndex();
blockChannelMap[&block] = chNum;
Block *dest = op.getDest();
while (dest) {
blockChannelMap[dest] = chNum;
if (dest->hasNoSuccessors()) break;
dest = dest->getSuccessors()[0];
if (blockChannelMap.contains(dest)) dest = nullptr;
}
}

for (Block &block : memOp.getOperation()->getRegion(0)) {
if (block.getOps<DMABDOp>().empty()) continue;
assert(blockChannelMap.count(&block));
DMABDOp bd = (*block.getOps<DMABDOp>().begin());
if (bd.getBdId().has_value())
assert(gen.bdIdAlreadyAssigned(bd.getBdId().value()) &&
"bdId assigned by user but not found during previous walk");
else
bd.setBdId(gen.nextBdId(blockChannelMap[&block]));
}
}
for (TileElement memOp : memOps) {
DenseMap<Block *, int> blockBdIdMap;
for (Block &block : memOp.getOperation()->getRegion(0)) {
if (block.getOps<DMABDOp>().empty()) continue;
DMABDOp bd = *block.getOps<DMABDOp>().begin();
assert(bd.getBdId().has_value() &&
"DMABDOp should have bd_id assigned by now");
blockBdIdMap[&block] = bd.getBdId().value();
}

for (Block &block : memOp.getOperation()->getRegion(0)) {
if (block.getOps<DMABDOp>().empty()) continue;
DMABDOp bd = *block.getOps<DMABDOp>().begin();
std::optional<int> nextBdId;
if (block.getNumSuccessors()) {
assert(llvm::range_size(block.getSuccessors()) == 1 &&
"should have only one successor block");
Block *nextBlock = block.getSuccessor(0);
if (!blockBdIdMap.contains(nextBlock))
assert(nextBlock->getOperations().size() == 1 &&
// for some reason i can't stick both of ops in a single
// isa<...>
(isa<EndOp>(nextBlock->getOperations().front()) ||
isa<DMAStartOp>(nextBlock->getOperations().front())) &&
"bb that's not in blockMap can only have aie.end");
else
nextBdId = blockBdIdMap[nextBlock];
bd.setNextBdId(nextBdId);
}
}
}
if (failed(assignBdIds(deviceOp))) signalPassFailure();
}
};

Expand Down
1 change: 1 addition & 0 deletions compiler/plugins/target/AMD-AIE/aie/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ iree_cc_library(
::AIEDialectIR
::AIEXDialectIR
::AIENormalizeAddressSpacesGen
iree::target::amd-aie::Utils::Utils
)

add_subdirectory(test)
29 changes: 21 additions & 8 deletions compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ void AMDAIEDialect::initializeAMDAIEOps() {
>();
}

//===----------------------------------------------------------------------===//
// AMDAIE_BdIdOp
//===----------------------------------------------------------------------===//

void BdIdOp::getAsmResultNames(function_ref<void(Value, StringRef)> setNameFn) {
setNameFn(getResult(), "bd_id");
}

//===----------------------------------------------------------------------===//
// AMDAIE_ControlCodeOp
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -430,14 +438,16 @@ void LogicalObjectFifoRelease::build(OpBuilder &b, mlir::OperationState &result,
// AMDAIE_NpuDmaCpyNdOp
//===----------------------------------------------------------------------===//

// Build a NpuDmaCpyNdOp with mixed static and dynamic entries.
// Build a NpuDmaCpyNdOp with mixed static and dynamic entries and target and
// source BD IDs.
void NpuDmaCpyNdOp::build(OpBuilder &b, OperationState &result, Value dma,
ArrayRef<OpFoldResult> targetOffsets,
ArrayRef<OpFoldResult> targetSizes,
ArrayRef<OpFoldResult> targetStrides,
ArrayRef<OpFoldResult> sourceOffsets,
ArrayRef<OpFoldResult> sourceSizes,
ArrayRef<OpFoldResult> sourceStrides) {
ArrayRef<OpFoldResult> sourceStrides,
mlir::Value targetBdId, mlir::Value sourceBdId) {
SmallVector<int64_t> staticTargetOffsets, staticTargetSizes,
staticTargetStrides;
SmallVector<int64_t> staticSourceOffsets, staticSourceSizes,
Expand All @@ -462,7 +472,7 @@ void NpuDmaCpyNdOp::build(OpBuilder &b, OperationState &result, Value dma,
dynamicTargetSizes, dynamicTargetStrides, staticTargetOffsets,
staticTargetSizes, staticTargetStrides, dynamicSourceOffsets,
dynamicSourceSizes, dynamicSourceStrides, staticSourceOffsets,
staticSourceSizes, staticSourceStrides);
staticSourceSizes, staticSourceStrides, targetBdId, sourceBdId);
}

// Build a NpuDmaCpyNdOp with static entries.
Expand All @@ -472,7 +482,8 @@ void NpuDmaCpyNdOp::build(OpBuilder &b, OperationState &result, Value dma,
ArrayRef<int64_t> targetStrides,
ArrayRef<int64_t> sourceOffsets,
ArrayRef<int64_t> sourceSizes,
ArrayRef<int64_t> sourceStrides) {
ArrayRef<int64_t> sourceStrides,
mlir::Value targetBdId, mlir::Value sourceBdId) {
SmallVector<OpFoldResult> targetOffsetValues = llvm::to_vector<4>(
llvm::map_range(targetOffsets, [&](int64_t v) -> OpFoldResult {
return b.getI64IntegerAttr(v);
Expand All @@ -499,14 +510,15 @@ void NpuDmaCpyNdOp::build(OpBuilder &b, OperationState &result, Value dma,
}));
build(b, result, dma, targetOffsetValues, targetSizeValues,
targetStrideValues, sourceOffsetValues, sourceSizeValues,
sourceStrideValues);
sourceStrideValues, targetBdId, sourceBdId);
}

// Build a NpuDmaCpyNdOp with dynamic entries.
void NpuDmaCpyNdOp::build(OpBuilder &b, OperationState &result, Value dma,
ValueRange targetOffsets, ValueRange targetSizes,
ValueRange targetStrides, ValueRange sourceOffsets,
ValueRange sourceSizes, ValueRange sourceStrides) {
ValueRange sourceSizes, ValueRange sourceStrides,
mlir::Value targetBdId, mlir::Value sourceBdId) {
SmallVector<OpFoldResult> targetOffsetValues =
llvm::to_vector<4>(llvm::map_range(
targetOffsets, [](Value v) -> OpFoldResult { return v; }));
Expand All @@ -525,7 +537,7 @@ void NpuDmaCpyNdOp::build(OpBuilder &b, OperationState &result, Value dma,
sourceStrides, [](Value v) -> OpFoldResult { return v; }));
build(b, result, dma, targetOffsetValues, targetSizeValues,
targetStrideValues, sourceOffsetValues, sourceSizeValues,
sourceStrideValues);
sourceStrideValues, targetBdId, sourceBdId);
}

DoublyStridedOpInterface NpuDmaCpyNdOp::createDoublyStridedOp(
Expand All @@ -544,7 +556,8 @@ DoublyStridedOpInterface NpuDmaCpyNdOp::createDoublyStridedOp(
getValueOrCreateConstantIndexOp(rewriter, loc, newTargetStrides),
getValueOrCreateConstantIndexOp(rewriter, loc, newSourceOffsets),
getValueOrCreateConstantIndexOp(rewriter, loc, newSourceSizes),
getValueOrCreateConstantIndexOp(rewriter, loc, newSourceStrides));
getValueOrCreateConstantIndexOp(rewriter, loc, newSourceStrides),
getTargetBdId(), getSourceBdId());
return cast<DoublyStridedOpInterface>(newOp.getOperation());
}

Expand Down
Loading
Loading