Skip to content

Commit

Permalink
Merge branch 'main' into link_creation_additional_check
Browse files Browse the repository at this point in the history
  • Loading branch information
newling authored Aug 28, 2024
2 parents f1d080d + a5fbf9d commit a741c12
Show file tree
Hide file tree
Showing 9 changed files with 92 additions and 223 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ci-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ on:
- main

schedule:
# At minute 0 past every 4nd hour. (see https://crontab.guru)
# At minute 0 past every 12th hour. (see https://crontab.guru)
# this job is to keep the ccache cache warm
- cron: '0 */4 * * *'
- cron: '0 */12 * * *'

concurrency:
# A PR number if a pull request and otherwise the commit hash. This cancels
Expand Down
5 changes: 4 additions & 1 deletion compiler/plugins/target/AMD-AIE/aie/AIEDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,13 +148,16 @@ void AIEDialect::printType(Type type, DialectAsmPrinter &printer) const {

/// without this, canonicalize/cse/etc will lift eg constants out of core ops
/// causing eg lower-to-aie to fail to converge
///
/// There's no way to do this is tablegen, so unfortunately it must be hidden
/// away here
struct AIEDialectFoldInterface : DialectFoldInterface {
using DialectFoldInterface::DialectFoldInterface;

/// Registered hook to check if the given region, which is attached to an
/// operation that is *not* isolated from above, should be used when
/// materializing constants.
bool shouldMaterializeInto(Region *region) const final override {
bool shouldMaterializeInto(Region *region) const final {
// If this is an AIE::CoreOp region, then insert into it.
return isa<CoreOp>(region->getParentOp());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#include "Passes.h"
#include "iree-amd-aie/aie_runtime/iree_aie_runtime.h"
#include "llvm/ADT/STLExtras.h"
#include "mlir/Analysis/TopologicalSortUtils.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
Expand Down Expand Up @@ -113,28 +112,11 @@ bool isJoin(ObjectFifoLinkOp op) { return op.getFifoIns().size() > 1; }
bool isDistribute(ObjectFifoLinkOp op) { return op.getFifoOuts().size() > 1; }

std::optional<Value> getOptionalSharedTile(ObjectFifoLinkOp op) {
if (isJoin(op)) {
auto fifoOut = getOutputObjectFifos(op)[0];
for (auto fifoIn : getInputObjectFifos(op))
if (fifoOut.getProducerTile() != fifoIn.getConsumerTiles()[0]) return {};
return {fifoOut.getProducerTile()};
}

if (isDistribute(op)) {
auto fifoIn = getInputObjectFifos(op)[0];
for (auto fifoOut : getOutputObjectFifos(op))
if (fifoIn.getConsumerTiles()[0] != fifoOut.getProducerTile()) return {};
return {fifoIn.getConsumerTiles()[0]};
}

auto fifoIn = getInputObjectFifos(op);
if (auto fifoOut = getOutputObjectFifos(op);
!fifoIn.empty() && !fifoOut.empty())
for (auto consumerIn : fifoIn[0].getConsumerTiles())
if (consumerIn == fifoOut[0].getProducerTile())
return {fifoOut[0].getProducerTile()};
return {};
std::vector<ObjectFifoCreateOp> fifoOuts = getOutputObjectFifos(op);
assert(fifoOuts.size() > 0);
return fifoOuts[0].getProducerTile();
}

} // namespace

class LockAnalysis {
Expand Down Expand Up @@ -168,21 +150,7 @@ class DMAChannelAnalysis {
DenseMap<Value, uint8_t> consumerChannelsPerTile;

public:
DMAChannelAnalysis(DeviceOp &device) {
// go over the channels used for each tile and update the producer/consumer
// channel maps
for (auto memOp : device.getOps<MemOp>()) {
Region &r = memOp.getBody();
auto tile = memOp.getTile();
for (auto &bl : r.getBlocks()) {
for (auto op : bl.getOps<DMAStartOp>()) {
static_cast<DMAChannelDir>(op.getChannelDir()) == DMAChannelDir::MM2S
? getProducerDMAChannel(tile)
: getConsumerDMAChannel(tile);
}
}
}
}
DMAChannelAnalysis() {}

/// Given an AIE tile, returns its next usable producer channel.
SwitchDMAConnection getProducerDMAChannel(Value tile) {
Expand Down Expand Up @@ -536,12 +504,6 @@ void replaceReleaseOp(
DenseMap<std::pair<ObjectFifoCreateOp, int>,
std::vector<ObjectFifoReleaseOp>> &releaseOps) {
ObjectFifoCreateOp op = getObjectFifo(releaseOp);
auto core = releaseOp->getParentOfType<CoreOp>();
if (auto linkOp = getOptionalLinkOp(op))
if (core.getTile() == *getOptionalSharedTile(*linkOp))
llvm::report_fatal_error(
"currently cannot access objectFifo used in "
"ObjectFifoLinkOp");

auto port = releaseOp.getPort();
std::pair<ObjectFifoCreateOp, int> opPort = {op, static_cast<int>(port)};
Expand Down Expand Up @@ -653,12 +615,7 @@ void replaceObjectAcquireOp(
const DenseMap<ObjectFifoCreateOp, std::vector<BufferOp>> &buffersPerFifo,
DenseMap<ObjectFifoAcquireOp, std::vector<BufferOp>> &subviews) {
ObjectFifoCreateOp op = getObjectFifo(acquireOp);
auto core = acquireOp->getParentOfType<CoreOp>();
auto linkOp = getOptionalLinkOp(op);
if (linkOp && core.getTile() == *getOptionalSharedTile(*linkOp))
llvm::report_fatal_error(
"currently cannot access objectFifo used in "
"ObjectFifoLinkOp");

// index of next element to acquire for this objectFifo
// useful for keeping track of which
Expand Down Expand Up @@ -995,7 +952,7 @@ struct AMDAIEObjectFifoStatefulTransformPass : mlir::OperationPass<DeviceOp> {
void runOnOperation() override {
DeviceOp device = getOperation();
LockAnalysis lockAnalysis(device);
DMAChannelAnalysis dmaAnalysis(device);
DMAChannelAnalysis dmaAnalysis;
OpBuilder builder = OpBuilder::atBlockEnd(device.getBody());
// maps each objFifo to its corresponding buffer
DenseMap<ObjectFifoCreateOp, std::vector<BufferOp>> buffersPerFifo;
Expand Down Expand Up @@ -1092,16 +1049,14 @@ struct AMDAIEObjectFifoStatefulTransformPass : mlir::OperationPass<DeviceOp> {
}

// Remove old ops
SetVector<Operation *> opsToErase;
IRRewriter rewriter(&getContext());
device.walk([&](Operation *op) {
if (isa<ObjectFifoCreateOp, ObjectFifoLinkOp, ObjectFifoAcquireOp,
ObjectFifoSubviewAccessOp, ObjectFifoReleaseOp>(op))
opsToErase.insert(op);
ObjectFifoSubviewAccessOp, ObjectFifoReleaseOp>(op)) {
op->dropAllUses();
rewriter.eraseOp(op);
}
});
topologicalSort(opsToErase);
IRRewriter rewriter(&getContext());
for (auto it = opsToErase.rbegin(); it != opsToErase.rend(); ++it)
(*it)->erase();
}
};

Expand Down
4 changes: 2 additions & 2 deletions compiler/plugins/target/AMD-AIE/aie/test/link_test_AIE1.mlir
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

// RUN: iree-opt --amdaie-objectFifo-stateful-transform %s | FileCheck %s

// CHECK-LABEL: aie.device(npu1_4col) {
// CHECK-LABEL: aie.device(xcvc1902) {
// CHECK: memref.global "public" @of2_cons : memref<16xi32>
// CHECK: memref.global "public" @of2 : memref<16xi32>
// CHECK: memref.global "public" @of1_cons : memref<16xi32>
Expand Down Expand Up @@ -68,7 +68,7 @@
// CHECK: }

module @link_AIE1 {
aie.device(npu1_4col) {
aie.device(xcvc1902) {
%tile20 = aie.tile(2, 0)
%tile12 = aie.tile(1, 2)
%tile22 = aie.tile(2, 2)
Expand Down
145 changes: 0 additions & 145 deletions compiler/plugins/target/AMD-AIE/aie/test/tileDMA_test.mlir

This file was deleted.

23 changes: 21 additions & 2 deletions compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@

#include "iree-amd-aie/IR/AMDAIEAttrs.h"
#include "iree-amd-aie/IR/AMDAIEDialect.cpp.inc"
#include "iree-amd-aie/IR/AMDAIETypes.h"
#include "mlir/IR/DialectImplementation.h"
#include "iree-amd-aie/IR/AMDAIEOps.h"
#include "mlir/Interfaces/FoldInterfaces.h"
#include "mlir/Transforms/InliningUtils.h"

namespace mlir::iree_compiler::AMDAIE {

Expand All @@ -24,11 +25,29 @@ struct AMDAIEDialectOpAsmInterface : public OpAsmDialectInterface {
}
};

/// without this, canonicalize/cse/etc will lift eg constants out of core ops
/// at every opportunity, causing problems when lowering to AIE.
///
/// There's no way to do this is tablegen, so unfortunately it must be hidden
/// away here
struct AMDAIEDialectFoldInterface : DialectFoldInterface {
using DialectFoldInterface::DialectFoldInterface;

/// Registered hook to check if the given region, which is attached to an
/// operation that is *not* isolated from above, should be used when
/// materializing constants.
bool shouldMaterializeInto(Region *region) const final {
// If this is an AMDAIE::CoreOp region, then insert into it.
return isa<AMDAIE::CoreOp>(region->getParentOp());
}
};

void AMDAIEDialect::initialize() {
initializeAMDAIEAttrs();
initializeAMDAIEOps();
initializeAMDAIETypes();
addInterfaces<AMDAIEDialectOpAsmInterface>();
addInterfaces<AMDAIEDialectFoldInterface>();
}

} // namespace mlir::iree_compiler::AMDAIE
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,34 @@ func.func @logicalobjectfifo_from_memref(%arg0: memref<1x1x8x16xi32, 1>) {
%1 = amdaie.dma_cpy_nd(%0[][][], %0[][][]) : (!amdaie.logicalobjectfifo<memref<1x1x8x16xi32, 1>>, !amdaie.logicalobjectfifo<memref<1x1x8x16xi32, 1>>)
return
}

// -----


// A test of AMDAIEDialectFoldInterface. Don't move ops out of cores.

// CHECK-LABEL: func @isolated_cores
// CHECK-NOT: arith.constant 3
// CHECK: amdaie.core
// CHECK: arith.constant 3
// CHECK: amdaie.core
// CHECK: arith.constant 3
func.func @isolated_cores() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%tile_0 = amdaie.tile(%c0, %c0)
%tile_1 = amdaie.tile(%c0, %c1)
%0 = amdaie.core(%tile_0, in : [], out : []) {
%c3 = arith.constant 3 : index
%alloc = memref.alloc() : memref<2x2xindex>
linalg.fill ins(%c3 : index) outs(%alloc : memref<2x2xindex>)
amdaie.end
}
%1 = amdaie.core(%tile_1, in : [], out : []) {
%c3 = arith.constant 3 : index
%alloc = memref.alloc() : memref<2x2xindex>
linalg.fill ins(%c3 : index) outs(%alloc : memref<2x2xindex>)
amdaie.end
}
return
}
Loading

0 comments on commit a741c12

Please sign in to comment.