Skip to content

Commit

Permalink
[AMD-AIE] Add support for C++ Pack pipeline (nod-ai#113)
Browse files Browse the repository at this point in the history
This commit adds the c++ pass pipeline to lower linalg operations using
pack-based approach.
Since we now need to support three separate efforts
now
1) Pad-based lowering
2) Simple Pack-based lowering (now set as default)
3) Pack-based lowering using more advanced optimizations like peeling
and double buffering.

This commit therefore now adds an enum to use to switch between the two pipelines.
The flag `iree-amdaie-use-pipeline` can be used to pick between the three.

Also removed is the use of the CPU pipeline enums from core as a proxy.
Unfortunately, the enums for the pass pipelines setup in core don't make
it extensible in a plugin. This needs to be addressed, but for now, the
"None" value is used, and instead a pack based approach is set as
default. This could be made robust, but will require upstream changes.

---------

Signed-off-by: Abhishek Varma <[email protected]>
Co-authored-by: yzhang93 <[email protected]>
Co-authored-by: MaheshRavishankar <[email protected]>
  • Loading branch information
3 people authored Jan 31, 2024
1 parent 656e7ca commit 89361be
Show file tree
Hide file tree
Showing 17 changed files with 257 additions and 122 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "air/Dialect/AIR/AIRDialect.h"
#include "air/Dialect/AIRRt/AIRRtDialect.h"
#include "iree-amd-aie/Transforms/Passes.h"
#include "iree-dialects/Dialect/LinalgExt/IR/LinalgExtDialect.h"
#include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenDialect.h"
#include "iree/compiler/Utils/FlatbufferUtils.h"
#include "llvm/Bitcode/BitcodeWriter.h"
Expand Down Expand Up @@ -73,6 +74,7 @@ class AIETargetBackend final : public IREE::HAL::TargetBackend {

void getDependentDialects(DialectRegistry &registry) const override {
registry.insert<mlir::iree_compiler::IREE::Codegen::IREECodegenDialect,
IREE::LinalgExt::IREELinalgExtDialect,
transform::TransformDialect, xilinx::AIE::AIEDialect,
xilinx::AIEX::AIEXDialect, xilinx::air::airDialect,
xilinx::airrt::AIRRtDialect>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@ namespace mlir::iree_compiler::AMDAIE {

namespace {
/// Add the lowering strategy configurations to be used for ops.
class AMDAIEAddLoweringStrategyPass
: public impl::AMDAIEAddLoweringStrategyBase<
AMDAIEAddLoweringStrategyPass> {
class AMDAIELoweringStrategyPass
: public impl::AMDAIELoweringStrategyBase<AMDAIELoweringStrategyPass> {
public:
void getDependentDialects(DialectRegistry &registry) const override {
registry.insert<
Expand All @@ -31,31 +30,31 @@ class AMDAIEAddLoweringStrategyPass
vector::VectorDialect>();
}

AMDAIEAddLoweringStrategyPass() = default;
AMDAIEAddLoweringStrategyPass(const AMDAIEAddLoweringStrategyOptions &options)
: AMDAIEAddLoweringStrategyBase(options) {}
AMDAIEAddLoweringStrategyPass(const AMDAIEAddLoweringStrategyPass &pass){};
AMDAIELoweringStrategyPass() = default;
AMDAIELoweringStrategyPass(const AMDAIELoweringStrategyOptions &options)
: AMDAIELoweringStrategyBase(options) {}
AMDAIELoweringStrategyPass(const AMDAIELoweringStrategyPass &pass){};

void runOnOperation() override;
};
} // namespace

void AMDAIEAddLoweringStrategyPass::runOnOperation() {
void AMDAIELoweringStrategyPass::runOnOperation() {
IREE::HAL::ExecutableVariantOp variantOp = getOperation();
ModuleOp moduleOp = variantOp.getInnerModule();
if (!moduleOp) {
getOperation()->emitError(
"Expected a variantOp root with an inner ModuleOp");
return signalPassFailure();
}
if (failed(initAIELaunchConfig(moduleOp, useUKernelStrategy))) {
if (failed(initAIELaunchConfig(moduleOp, usePassPipeline))) {
return signalPassFailure();
}
}

std::unique_ptr<Pass> createAMDAIEAddLoweringStrategyPass(
AMDAIEAddLoweringStrategyOptions options) {
return std::make_unique<AMDAIEAddLoweringStrategyPass>(options);
std::unique_ptr<Pass> createAMDAIELoweringStrategyPass(
AMDAIELoweringStrategyOptions options) {
return std::make_unique<AMDAIELoweringStrategyPass>(options);
}

} // namespace mlir::iree_compiler::AMDAIE
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ using mlir::iree_compiler::IREE::Codegen::LoweringConfigAttr;
namespace mlir::iree_compiler::AMDAIE {

namespace {

/// Lowers an hal.executable.variant operation to scalar/native-vector
/// code. Invokes different compilation pipeline to
/// - first lower to scalar/native-vector code
Expand All @@ -56,6 +57,9 @@ class AMDAIELowerExecutableTargetPass
AMDAIELowerExecutableTargetPass() = default;
AMDAIELowerExecutableTargetPass(
const AMDAIELowerExecutableTargetPass &pass){};
AMDAIELowerExecutableTargetPass(
const AMDAIELowerExecutableTargetOptions &options)
: AMDAIELowerExecutableTargetBase(options) {}

void runOnOperation() override;
};
Expand Down Expand Up @@ -138,12 +142,17 @@ void AMDAIELowerExecutableTargetPass::runOnOperation() {
case IREE::Codegen::DispatchLoweringPassPipeline::TransformDialectCodegen:
addTransformDialectPasses(executableLoweringPipeline);
break;
// TODO(avarma): Currently we are using "CPUDefault" but resorting to use
// the default case. Will soon have corresponding AIE enum.
default:
case IREE::Codegen::DispatchLoweringPassPipeline::None: {
TilingConfig tilingConfig = getTilingConfigForPipeline(moduleOp);
addPadBasedPassPipeline(executableLoweringPipeline, tilingConfig);
break;
if (usePassPipeline == AIEPassPipeline::SimplePackPipeline) {
addPackBasedPassPipeline(executableLoweringPipeline, tilingConfig);
} else if (usePassPipeline == AIEPassPipeline::PadPipeline) {
addPadBasedPassPipeline(executableLoweringPipeline, tilingConfig);
}
} break;
default:
variantOp.emitOpError("unhandled pass pipeline value set");
return signalPassFailure();
}
}

Expand All @@ -152,8 +161,9 @@ void AMDAIELowerExecutableTargetPass::runOnOperation() {
}
}

std::unique_ptr<Pass> createAMDAIELowerExecutableTargetPass() {
return std::make_unique<AMDAIELowerExecutableTargetPass>();
std::unique_ptr<Pass> createAMDAIELowerExecutableTargetPass(
AMDAIELowerExecutableTargetOptions options) {
return std::make_unique<AMDAIELowerExecutableTargetPass>(options);
}

} // namespace mlir::iree_compiler::AMDAIE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include "iree-amd-aie/Transforms/Passes.h"
#include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenAttrs.h"
#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
#include "mlir/Pass/Pass.h"

Expand All @@ -30,19 +31,19 @@ struct PackConfig {
static FailureOr<PackConfig> getPackConfig(RewriterBase &rewriter,
int packLevel) {
PackConfig config;
if (packLevel == 1) {
if (packLevel == 0) {
// packed size for [M, N, K]
config.packedSizes = {rewriter.getI64IntegerAttr(16),
rewriter.getI64IntegerAttr(64),
rewriter.getI64IntegerAttr(64)};
config.packedSizes = {rewriter.getI64IntegerAttr(8),
rewriter.getI64IntegerAttr(16),
rewriter.getI64IntegerAttr(16)};
// Transpose B matrix from [K N n k] to [K N k n]
config.transposePackIndices = {1};
// There is no corresponding unpack for the specified pack operation
// 0 is used when unpack is empty
config.unpackEmpty = {0};
config.innerPerm = {{1, 0}};
config.outerPerm = {{0, 1}};
} else if (packLevel == 2) {
} else if (packLevel == 1) {
// packed size for [M, N, K, m, n, k]
config.packedSizes = {
rewriter.getI64IntegerAttr(0), rewriter.getI64IntegerAttr(0),
Expand Down Expand Up @@ -163,6 +164,12 @@ void AMDAIEPackAndTransposePass::runOnOperation() {
// Update packed linalg op
packedOp = packTransResult->transposedLinalgOp;
}

// Get the lowering config from the previous linalgOp and add it to the
// packedOp
if (auto config = getLoweringConfig(linalgOp)) {
setLoweringConfig(packedOp, config);
}
}

} // namespace
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ static bool isTilingReductionDimension(TilingInterface consumerOp,
return false;
}

static bool consumerToSkip(TilingInterface op) {
if (isa<linalg::CopyOp>(op) || isa<tensor::UnPackOp>(op)) return true;
return false;
}

LogicalResult applyTileAndFuse(RewriterBase &rewriter, TilingInterface rootOp,
DominanceInfo &dominanceInfo,
scf::SCFTileAndFuseOptions &tileAndFuseOptions) {
Expand Down Expand Up @@ -106,9 +111,9 @@ void AMDAIETileAndFusePass::runOnOperation() {

TilingInterface consumerOp;
funcOp->walk<WalkOrder::PostOrder, ReverseIterator>([&](TilingInterface op) {
// Find the next consumer op if it does not have loops OR if it is a
// linalg.copy op.
if (op.getLoopIteratorTypes().empty() || isa<linalg::CopyOp>(op))
// Find the next consumer op if it does not have loops OR it is from
// the skip ops list which currently contains linalg.copy and tensor.unpack.
if (op.getLoopIteratorTypes().empty() || consumerToSkip(op))
return WalkResult::advance();
consumerOp = op;
return WalkResult::interrupt();
Expand Down Expand Up @@ -146,7 +151,7 @@ void AMDAIETileAndFusePass::runOnOperation() {
getAsIndexOpFoldResult(context, tileSizesVal);
auto options = scf::SCFTilingOptions().setTileSizes(tileSizes);
// When tiling using scf.for we do not need to set any mapping.
if (tilingLevel != 2) {
if (!useSCFFor) {
options.setMapping(
{gpu::GPUBlockMappingAttr::get(context, gpu::MappingId::DimY),
gpu::GPUBlockMappingAttr::get(context, gpu::MappingId::DimX)});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ namespace mlir::iree_compiler::AMDAIE {
/// implements the contraction operation interface.
static LogicalResult setRootConfig(func::FuncOp entryPointFn,
linalg::MatmulOp matmulOp,
bool useUKernelStrategy) {
AIEPassPipeline usePassPipeline) {
assert(!getLoweringConfig(matmulOp) && "expected lowering_config is not set");
auto linalgOp = cast<linalg::LinalgOp>(matmulOp.getOperation());
unsigned numLoops = linalgOp.getNumLoops();
Expand All @@ -35,34 +35,47 @@ static LogicalResult setRootConfig(func::FuncOp entryPointFn,
// TODO (nmeshram) : This needs to be moved in a separate more generalized
// logic. Also, need a flag to experiment between pad based and pack based
// approach which will have different tile sizes and pass pipelines
TileSizesListType tileSizes;
if (useUKernelStrategy) {
SmallVector<int64_t> TileSizeLevel0 = {16, 64};
SmallVector<int64_t> TileSizeLevel1 = {0, 0, 64};
SmallVector<int64_t> TileSizeLevel2 = {1, 1};
tileSizes = {TileSizeLevel0, TileSizeLevel1, TileSizeLevel2};
} else {
if (usePassPipeline == AIEPassPipeline::PadPipeline) {
SmallVector<int64_t> TileSizeLevel0 = {8, 8};
SmallVector<int64_t> TileSizeLevel1 = {4, 4};
SmallVector<int64_t> TileSizeLevel2 = {0, 0, 4};
tileSizes = {TileSizeLevel0, TileSizeLevel1, TileSizeLevel2};
TileSizesListType tileSizes = {TileSizeLevel0, TileSizeLevel1,
TileSizeLevel2};
return setOpConfigAndEntryPointFnTranslation(
entryPointFn, matmulOp, tileSizes,
IREE::Codegen::DispatchLoweringPassPipeline::None);
} else if (usePassPipeline == AIEPassPipeline::SimplePackPipeline) {
SmallVector<int64_t> TileSizeLevel0 = {8, 16};
SmallVector<int64_t> TileSizeLevel1 = {1, 1};
SmallVector<int64_t> TileSizeLevel2 = {0, 0, 1};
TileSizesListType tileSizes = {TileSizeLevel0, TileSizeLevel1,
TileSizeLevel2};
return setOpConfigAndEntryPointFnTranslation(
entryPointFn, matmulOp, tileSizes,
IREE::Codegen::DispatchLoweringPassPipeline::None);
} else if (usePassPipeline == AIEPassPipeline::PackPipeline) {
SmallVector<int64_t> TileSizeLevel0 = {16, 64};
SmallVector<int64_t> TileSizeLevel1 = {0, 0, 64};
SmallVector<int64_t> TileSizeLevel2 = {1, 1};
TileSizesListType tileSizes = {TileSizeLevel0, TileSizeLevel1,
TileSizeLevel2};
return setOpConfigAndEntryPointFnTranslation(
entryPointFn, matmulOp, tileSizes,
IREE::Codegen::DispatchLoweringPassPipeline::None);
}

return setOpConfigAndEntryPointFnTranslation(
entryPointFn, matmulOp, tileSizes,
IREE::Codegen::DispatchLoweringPassPipeline::CPUDefault);
return matmulOp.emitOpError("unhandled pass pipeline");
}

/// Redirects to methods that set the configuration based on operation type.
static LogicalResult setRootConfigImpl(func::FuncOp entryPointFn, Operation *op,
bool useUKernelStrategy) {
AIEPassPipeline usePassPipeline) {
auto setRootConfigFn = [&](Operation *op) -> LogicalResult {
return TypeSwitch<Operation *, LogicalResult>(op)
// TODO (nmeshram): This is very limited for now, plan is to
// let it first crash for all the other ops and then consiously
// add support for them, this way we can verify our work.
.Case<linalg::MatmulOp>([&](auto op) {
return setRootConfig(entryPointFn, op, useUKernelStrategy);
return setRootConfig(entryPointFn, op, usePassPipeline);
})
.Default([&](Operation *op) { return success(); });
};
Expand All @@ -72,7 +85,7 @@ static LogicalResult setRootConfigImpl(func::FuncOp entryPointFn, Operation *op,
/// Sets the translation information to use for a dispatch region.
static LogicalResult setTranslationInfoAndRootConfig(
func::FuncOp entryPointFn, ArrayRef<Operation *> computeOps,
bool useUKernelStrategy) {
AIEPassPipeline usePassPipeline) {
// Make sure that lowering_config is not preset on any compute ops.
for (auto computeOp : computeOps) {
if (getLoweringConfig(computeOp)) return failure();
Expand All @@ -87,8 +100,7 @@ static LogicalResult setTranslationInfoAndRootConfig(
return entryPointFn.emitError("Case with no root ops not yet supported.");
}

if (failed(
setRootConfigImpl(entryPointFn, rootOperation, useUKernelStrategy))) {
if (failed(setRootConfigImpl(entryPointFn, rootOperation, usePassPipeline))) {
return failure();
}

Expand All @@ -98,7 +110,8 @@ static LogicalResult setTranslationInfoAndRootConfig(
return success();
}

LogicalResult initAIELaunchConfig(ModuleOp moduleOp, bool useUKernelStrategy) {
LogicalResult initAIELaunchConfig(ModuleOp moduleOp,
AIEPassPipeline usePassPipeline) {
llvm::StringMap<IREE::HAL::ExecutableExportOp> exportOps =
getAllEntryPoints(moduleOp);
for (auto funcOp : moduleOp.getOps<func::FuncOp>()) {
Expand All @@ -113,7 +126,7 @@ LogicalResult initAIELaunchConfig(ModuleOp moduleOp, bool useUKernelStrategy) {

SmallVector<Operation *> computeOps = getComputeOps(funcOp);
if (failed(setTranslationInfoAndRootConfig(funcOp, computeOps,
useUKernelStrategy))) {
usePassPipeline))) {
return failure();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,18 @@

namespace mlir::iree_compiler::AMDAIE {

/// Enum for pass pipelines to pick. Because of how the pass-pipeline
/// enums are implemented using tablegen in IREE, it isnt extensible.
/// This is an enum to pick different pass pipelines in IREE.
enum class AIEPassPipeline : int32_t {
PadPipeline = 0,
PackPipeline = 1,
SimplePackPipeline = 2,
None = 3
};

LogicalResult initAIELaunchConfig(ModuleOp moduleOp,
bool useUKernelStrategy = false);
AIEPassPipeline usePassPipeline);

} // namespace mlir::iree_compiler::AMDAIE

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// Copyright 2023 The IREE Authors
// Copyright 2023 The IREE Authors#include "irer"

//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
Expand All @@ -7,6 +8,7 @@
#ifndef IREE_AMD_AIE_TRANSFORMS_PASSDETAIL_H_
#define IREE_AMD_AIE_TRANSFORMS_PASSDETAIL_H_

#include "iree-amd-aie/Transforms/KernelDispatch.h"
#include "iree/compiler/Dialect/HAL/IR/HALOps.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
Expand All @@ -22,7 +24,7 @@ namespace mlir::iree_compiler::AMDAIE {
#define GEN_PASS_DEF_AMDAIECLEANUP
#define GEN_PASS_DEF_AMDAIEFUSEFILLINTOFORALL
#define GEN_PASS_DEF_AMDAIELOWEREXECUTABLETARGET
#define GEN_PASS_DEF_AMDAIEADDLOWERINGSTRATEGY
#define GEN_PASS_DEF_AMDAIELOWERINGSTRATEGY
#define GEN_PASS_DEF_AMDAIELOWERWORKGROUPCOUNT
#define GEN_PASS_DEF_AMDAIEPACKANDTRANSPOSE
#define GEN_PASS_DEF_AMDAIEPAD
Expand Down
Loading

0 comments on commit 89361be

Please sign in to comment.