Skip to content

Commit

Permalink
Move most of LLVM lowering out of aie2xclbin (nod-ai#838)
Browse files Browse the repository at this point in the history
Is a continuation of nod-ai@be42d19
-- it that PR we moved vector->aievec into the main pipeline, this PR
moves aievec->llvm into the main pipeline (and a few other dialect
lowerings to llvm). Moving the lowering of func to llvm is tricky,
because of the position of the pass `AMDAIECoreToStandardFunc` which
lowers from CoreOp to func. TBC.
  • Loading branch information
newling authored Oct 10, 2024
1 parent a533e7d commit 039ba23
Show file tree
Hide file tree
Showing 8 changed files with 162 additions and 98 deletions.
73 changes: 28 additions & 45 deletions compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/XCLBinGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@
#include <sstream>

#include "AMDAIETargets.h"
#include "iree-amd-aie/Transforms/Passes.h"
#include "aie/Passes.h"
#include "air/Conversion/AIRToAIEPass.h"
#include "iree-dialects/Dialect/LinalgTransform/Passes.h"
#include "iree/compiler/Utils/ToolUtils.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h"
Expand All @@ -24,6 +25,15 @@
#include "llvm/Support/Path.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/ToolOutputFile.h"
#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
#include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h"
#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h"
#include "mlir/Conversion/MathToLLVM/MathToLLVM.h"
#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.h"
#include "mlir/Dialect/MemRef/Transforms/Passes.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/Dialect/Vector/Transforms/VectorTransforms.h"
#include "mlir/IR/AsmState.h"
Expand All @@ -33,6 +43,7 @@
#include "mlir/Pass/PassManager.h"
#include "mlir/Support/FileUtilities.h"
#include "mlir/Target/LLVMIR/Export.h"
#include "mlir/Transforms/Passes.h"

#define DEBUG_TYPE "amdaie-xclbingen"

Expand Down Expand Up @@ -942,47 +953,20 @@ static LogicalResult generateXCLBin(
return runTool(xclbinutilBin.value().string(), flags, verbose);
}

// A pass which removes the alignment attribute from llvm load operations, if
// the alignment is less than 4 (2 or 1).
//
// Example replaces:
//
// ```
// %113 = llvm.load %112 {alignment = 2 : i64} : !llvm.ptr -> vector<32xbf16>
// ```
//
// with
//
// ```
// %113 = llvm.load %112 : !llvm.ptr -> vector<32xbf16>
// ```
//
// If this pass is not included in the pipeline, there is an alignment error
// later in the compilation. This is a temporary workaround while a better
// solution is found: propagation of memref.assume_alignment is one option. See
// also https://jira.xilinx.com/projects/AIECC/issues/AIECC-589
namespace {
struct RemoveAlignment2FromLLVMLoadPass
: PassWrapper<RemoveAlignment2FromLLVMLoadPass, OperationPass<ModuleOp>> {
void runOnOperation() override {
getOperation().walk([](Operation *op) {
if (auto loadOp = dyn_cast<LLVM::LoadOp>(op)) {
auto alignmentAttr = loadOp.getAlignmentAttr();
if (alignmentAttr) {
int alignmentVal = alignmentAttr.getValue().getSExtValue();
if (alignmentVal == 2 || alignmentVal == 1) {
loadOp.setAlignment(std::optional<uint64_t>());
}
}
}
});
}

MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(
RemoveAlignment2FromLLVMLoadPass);
};

} // namespace
void addLowerToLLVMPasses(OpPassManager &pm) {
using namespace mlir;
pm.addPass(createFinalizeMemRefToLLVMConversionPass());
pm.addPass(createCanonicalizerPass());
pm.addPass(createCSEPass());
ConvertFuncToLLVMPassOptions opts;
opts.useBarePtrCallConv = true;
pm.addPass(createConvertFuncToLLVMPass(opts));
pm.addPass(createCanonicalizerPass());
pm.addPass(createCSEPass());
pm.addPass(createConvertControlFlowToLLVMPass());
pm.addPass(createCanonicalizerPass());
pm.addPass(createCSEPass());
}

static LogicalResult generateUnifiedObject(
MLIRContext *context, AIE::DeviceOp deviceOp, const std::string &outputFile,
Expand All @@ -998,8 +982,7 @@ static LogicalResult generateUnifiedObject(
printIRModuleScope, timing);

pm.addPass(mlir::iree_compiler::AMDAIE::createAMDAIECoreToStandardPass());
mlir::iree_compiler::AMDAIE::addLowerToLLVMPasses(pm);
pm.addPass(std::make_unique<RemoveAlignment2FromLLVMLoadPass>());
addLowerToLLVMPasses(pm);

if (verbose) {
llvm::outs() << "\nRunning: ";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// Copyright 2024 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "iree-amd-aie/IR/AMDAIEDialect.h"
#include "iree-amd-aie/Transforms/Passes.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Dialect/SCF/Transforms/Transforms.h"
#include "mlir/Dialect/SCF/Utils/Utils.h"
#include "mlir/Pass/Pass.h"
#define DEBUG_TYPE "iree-amdaie-acquire-release-to-use-lock"

namespace mlir::iree_compiler::AMDAIE {

using namespace mlir;

namespace {

// A pass which removes the alignment attribute from llvm load operations,
// if the alignment is less than 4 (2 or 1).
//
// Example. The pass replaces:
//
// ```
// %113 = llvm.load %112 {alignment = 2 : i64}
// : !llvm.ptr -> vector<32xbf16>
// ```
//
// with
//
// ```
// %113 = llvm.load %112
// : !llvm.ptr -> vector<32xbf16>
// ```
//
// If this pass is not included in the matmul pipeline, there is an OOM error
// later in the compilation. This is a temporary workaround while a better
// solution is found: propagation of memref.assume_alignment is one option.
// See also https://jira.xilinx.com/projects/AIECC/issues/AIECC-589

class AMDAIELoadAlignmentReset
: public impl::AMDAIELoadAlignmentResetBase<
AMDAIELoadAlignmentReset> {
void getDependentDialects(DialectRegistry &registry) const override {
registry.insert<AMDAIEDialect>();
}

void runOnOperation() override {
getOperation()->walk([](Operation *op) {
if (auto loadOp = dyn_cast<LLVM::LoadOp>(op)) {
auto alignmentAttr = loadOp.getAlignmentAttr();
if (alignmentAttr) {
int alignmentVal = alignmentAttr.getValue().getSExtValue();
if (alignmentVal == 2 || alignmentVal == 1) {
loadOp.setAlignment(std::optional<uint64_t>());
}
}
}
});
}
};

} // namespace

std::unique_ptr<Pass> createAMDAIELoadAlignmentResetPass() {
return std::make_unique<AMDAIELoadAlignmentReset>();
}

} // namespace mlir::iree_compiler::AMDAIE
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ iree_cc_library(
"AMDAIEInsertLoopsForVectorization.cpp"
"AMDAIELinkExecutables.cpp"
"AMDAIELocalizeLogicalObjectFifo.cpp"
"AMDAIELoadAlignmentReset.cpp"
"AMDAIELogicalObjFifoSplittingUtils.cpp"
"AMDAIELowerExecutableTarget.cpp"
"AMDAIELowerFuncArgs.cpp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ namespace mlir::iree_compiler::AMDAIE {
#define GEN_PASS_DEF_AMDAIEINSERTCORES
#define GEN_PASS_DEF_AMDAIEINSERTLOOPSFORVECTORIZATION
#define GEN_PASS_DEF_AMDAIELINKEXECUTABLES
#define GEN_PASS_DEF_AMDAIELOADALIGNMENTRESET
#define GEN_PASS_DEF_AMDAIELOCALIZELOGICALOBJECTFIFO
#define GEN_PASS_DEF_AMDAIELOWEREXECUTABLETARGET
#define GEN_PASS_DEF_AMDAIELOWERINGSTRATEGY
Expand Down
88 changes: 40 additions & 48 deletions compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,13 @@
#include "air/Transform/AIRMiscPasses.h"
#include "air/Transform/AffineLoopOptPass.h"
#include "iree-amd-aie/IR/AMDAIEAttrs.h"
#include "iree-amd-aie/Transforms/Passes.h"
#include "iree-dialects/Dialect/LinalgTransform/Passes.h"
#include "iree/compiler/Codegen/Common/Passes.h"
#include "iree/compiler/Utils/ToolUtils.h"
#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
#include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h"
#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h"
#include "mlir/Conversion/MathToLLVM/MathToLLVM.h"
#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.h"
#include "mlir/Dialect/Affine/Passes.h"
Expand Down Expand Up @@ -541,6 +539,7 @@ void buildAMDAIETransformPassPipeline(
});
}


void addAMDAIEObjectFifoLoweringPasses(OpPassManager &passManager,
bool enablePacketFlow,
TilePassPipeline useTilePipeline) {
Expand Down Expand Up @@ -623,6 +622,44 @@ void addAMDAIEObjectFifoLoweringPasses(OpPassManager &passManager,
addMLIRAIELoweringPasses(passManager);
}

void addMLIRAIELoweringPasses(OpPassManager &pm) {
{
OpPassManager &devicePM = pm.nest<xilinx::AIE::DeviceOp>();
devicePM.addPass(createCanonicalizerPass());
devicePM.addPass(createAMDAIEDmaToNpuPass());
devicePM.addPass(createAMDAIEAssignBufferDescriptorIDsPass());
devicePM.addPass(createAMDAIEAssignBufferAddressesBasicPass());
devicePM.addPass(createAMDAIEPathfinderPass());
}

pm.addPass(createCanonicalizerPass());
pm.addPass(createConvertLinalgToLoopsPass());
pm.addPass(createLowerAffinePass());
pm.addPass(createConvertSCFToCFPass());

{
OpPassManager &devicePM = pm.nest<xilinx::AIE::DeviceOp>();
devicePM.addPass(createAMDAIELocalizeLocksPass());
devicePM.addPass(createAMDAIENormalizeAddressSpacesPass());
devicePM.addPass(createCanonicalizerPass());
}

mlir::iree_compiler::aievec::buildConvertVectorToAIEVec(pm);

pm.addPass(createCanonicalizerPass());
pm.addPass(createCSEPass());
pm.addPass(aievec::createConvertAIEVecToLLVMPass());
pm.addPass(createConvertVectorToLLVMPass());
pm.addPass(memref::createExpandStridedMetadataPass());
pm.addPass(createLowerAffinePass());
pm.addPass(createConvertMathToLLVMPass());
pm.addPass(createArithToLLVMConversionPass());
pm.addPass(createCanonicalizerPass());
pm.addPass(createCSEPass());
pm.addPass(createAMDAIELoadAlignmentResetPass());
pm.addPass(createCanonicalizerPass());
}

// TODO (Erwei): The "packPeel" temporary argument should be removed once
// pack-peel and pack-pad share the same pass pipeline. See TODOs inlined below
// for details.
Expand Down Expand Up @@ -796,30 +833,7 @@ void addMLIRAIRLoweringPasses(OpPassManager &passManager, AMDAIEDevice device,
addMLIRAIELoweringPasses(passManager);
}

void addMLIRAIELoweringPasses(OpPassManager &passManager) {
{
OpPassManager &devicePM = passManager.nest<xilinx::AIE::DeviceOp>();
devicePM.addPass(createCanonicalizerPass());
devicePM.addPass(createAMDAIEDmaToNpuPass());
devicePM.addPass(createAMDAIEAssignBufferDescriptorIDsPass());
devicePM.addPass(createAMDAIEAssignBufferAddressesBasicPass());
devicePM.addPass(createAMDAIEPathfinderPass());
}

passManager.addPass(createCanonicalizerPass());
passManager.addPass(createConvertLinalgToLoopsPass());
passManager.addPass(createLowerAffinePass());
passManager.addPass(createConvertSCFToCFPass());

{
OpPassManager &devicePM = passManager.nest<xilinx::AIE::DeviceOp>();
devicePM.addPass(createAMDAIELocalizeLocksPass());
devicePM.addPass(createAMDAIENormalizeAddressSpacesPass());
devicePM.addPass(createCanonicalizerPass());
}

mlir::iree_compiler::aievec::buildConvertVectorToAIEVec(passManager);
}

// NOTE: this runs on the top-level program module containing all hal.executable
// ops.
Expand All @@ -832,28 +846,6 @@ void buildAMDAIELinkingPassPipeline(OpPassManager &passManager) {
mlir::createCanonicalizerPass());
}

void addLowerToLLVMPasses(OpPassManager &pm) {
pm.addPass(createCanonicalizerPass());
pm.addPass(createCSEPass());
pm.addPass(aievec::createConvertAIEVecToLLVMPass());
pm.addPass(createConvertVectorToLLVMPass());
pm.addPass(memref::createExpandStridedMetadataPass());
pm.addPass(createLowerAffinePass());
pm.addPass(createConvertMathToLLVMPass());
pm.addPass(createArithToLLVMConversionPass());
pm.addPass(createFinalizeMemRefToLLVMConversionPass());
pm.addPass(createCanonicalizerPass());
pm.addPass(createCSEPass());
ConvertFuncToLLVMPassOptions opts;
opts.useBarePtrCallConv = true;
pm.addPass(createConvertFuncToLLVMPass(opts));
pm.addPass(createCanonicalizerPass());
pm.addPass(createCSEPass());
pm.addPass(createConvertControlFlowToLLVMPass());
pm.addPass(createCanonicalizerPass());
pm.addPass(createCSEPass());
}

namespace {
#define GEN_PASS_REGISTRATION
#include "iree-amd-aie/Transforms/Passes.h.inc"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ void addMLIRAIRLoweringPasses(OpPassManager &passManager, AMDAIEDevice device,
/// currently the default passes used for lowering from AIE dialect.
void addMLIRAIELoweringPasses(OpPassManager &passManager);


/// Populates passes needed to lower linalg/arith/math ops to LLVM dialect via
/// the structured ops path. The pass manager `pm` here operate on the module
/// within the IREE::HAL::ExecutableOp.
Expand All @@ -39,8 +40,6 @@ void buildAMDAIETransformPassPipeline(
bool enableVectorizationPasses, const std::string &pathToUkernels,
bool enablePacketFlow);

void addLowerToLLVMPasses(OpPassManager &pm);

/// Populates passes needed to lower the IR via a Pack-Peel based approach.
void addPackPeelBasedPassPipeline(OpPassManager &oassManager,
TilingConfig &tilingConfig,
Expand Down Expand Up @@ -268,6 +267,9 @@ std::unique_ptr<Pass> createAMDAIETileAndFusePass(
/// Create pass to propagate pack/unpack ops using upstream patterns.
std::unique_ptr<Pass> createAMDAIEPropagateDataLayoutPass();

/// Create pass to reset the alignment of LLVM load operations.
std::unique_ptr<Pass> createAMDAIELoadAlignmentResetPass();

void registerAMDAIEPasses();

} // namespace mlir::iree_compiler::AMDAIE
Expand Down
11 changes: 11 additions & 0 deletions compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,17 @@ def AMDAIELinkExecutables :
let constructor = "mlir::iree_compiler::AMDAIE::createAMDAIELinkExecutablesPass()";
}

def AMDAIELoadAlignmentReset :
Pass<"iree-amdaie-load-alignment-reset", ""> {
let summary = "Reset the alignment of the LLVM load operations.";
let constructor = "mlir::iree_compiler::AMDAIE::createAMDAIELoadAlignmentResetPass()";
let description = [{
Reset the alignment of the LLVM load operations to the 'unset'
optional value. This is a workaround for an issue in peano, and
should eventually be removed.
}];
}

def AMDAIELocalizeLogicalObjectfifo :
Pass<"iree-amdaie-localize-logicalobjectfifo", "ModuleOp"> {
let summary = "Localize logical objectfifos to local parallel loop scopes.";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
//
// 3) Not specifying the flag at all, which should use the default value (1).


// 1) Explicitly disabled:
// RUN: iree-compile --iree-hal-target-backends=amd-aie \
// RUN: --compile-to=executable-targets --iree-amdaie-enable-vectorization-passes=0 %s | FileCheck %s -check-prefix=CHECK-DISABLED
Expand All @@ -23,6 +24,7 @@
// RUN: --compile-to=executable-targets %s | FileCheck %s -check-prefix=CHECK-DEFAULT



func.func @mm_in_bf16_out_f32(%lhs: tensor<64x64xbf16>,
%rhs: tensor<64x64xbf16>) -> tensor<64x64xf32> {
%empty = tensor.empty() : tensor<64x64xf32>
Expand All @@ -34,6 +36,6 @@ func.func @mm_in_bf16_out_f32(%lhs: tensor<64x64xbf16>,
return %res : tensor<64x64xf32>
}

// CHECK-DISABLED-NOT: aievec.matmul
// CHECK-ENABLED: aievec.matmul
// CHECK-DEFAULT: aievec.matmul
// CHECK-DISABLED-NOT: xllvm.intr.aie2.bf.mac16.conf
// CHECK-ENABLED: xllvm.intr.aie2.bf.mac16.conf
// CHECK-DEFAULT: xllvm.intr.aie2.bf.mac16.conf

0 comments on commit 039ba23

Please sign in to comment.