From b94b170b98a43e42fbbb3e3a6180ce9192351c94 Mon Sep 17 00:00:00 2001 From: Stephen Neuendorffer Date: Mon, 21 Aug 2023 21:45:14 -0700 Subject: [PATCH] Pipeclean AIE2 peano flow (#582) Peano for AIE2 has matured quite a bit, but we haven't been actively using it for AIE2/phoenix. This patch enables some simple designs to go through the flow: 1) aiecc.py now sets the correct architecture for llc when using Peano. 2) We generate the correct target architecture in LLVMIR based on the DeviceOp in MLIR. 3) We lower lock operations to AIE2 lock intrinsics based on the DeviceOp as well. --- .../AIE/Transforms/AIECoreToStandard.cpp | 70 +++++++++++++++---- test/aiecc/simple.mlir | 8 ++- tools/aiecc/aiecc/main.py | 4 +- 3 files changed, 65 insertions(+), 17 deletions(-) diff --git a/lib/Dialect/AIE/Transforms/AIECoreToStandard.cpp b/lib/Dialect/AIE/Transforms/AIECoreToStandard.cpp index c3040a7f73..4aadfb6c2b 100644 --- a/lib/Dialect/AIE/Transforms/AIECoreToStandard.cpp +++ b/lib/Dialect/AIE/Transforms/AIECoreToStandard.cpp @@ -198,11 +198,24 @@ struct AIEUseLockToStdLowering : public OpConversionPattern { matchAndRewrite(UseLockOp useLock, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { if (!isa(useLock->getParentOp())) { - std::string funcName = "llvm.aie.lock."; + auto device = useLock->getParentOfType(); + if (!device) { + return module.emitOpError("Device Not found!"); + } + const auto &target_model = device.getTargetModel(); + + // Generate the intrinsic name + std::string funcName = ""; + if (target_model.getTargetArch() == AIEArch::AIE1) + funcName = "llvm.aie.lock."; + else + funcName = "llvm.aie2."; if (useLock.acquire() || useLock.acquire_ge()) - funcName += "acquire.reg"; + funcName += "acquire"; else if (useLock.release()) - funcName += "release.reg"; + funcName += "release"; + if (target_model.getTargetArch() == AIEArch::AIE1) + funcName += ".reg"; auto useLockFunc = module.lookupSymbol(funcName); if (!useLockFunc) @@ -362,11 +375,27 @@ struct AIECoreToStandardPass ModuleOp m = getOperation(); OpBuilder builder = OpBuilder::atBlockEnd(m.getBody()); + if (m.getOps().empty()) { + m.emitOpError("expected AIE.device operation at toplevel"); + signalPassFailure(); + } + DeviceOp device = *(m.getOps().begin()); + const auto &target_model = device.getTargetModel(); + const char *triple; + switch (target_model.getTargetArch()) { + case AIEArch::AIE1: + triple = "aie"; + break; + case AIEArch::AIE2: + triple = "aie2"; + break; + } + // Ensure that we don't have an incorrect target triple. This may override // some bogus target triple in the original mlir. In reality this should // pick the 'aie' target triple. m->setAttr(LLVM::LLVMDialect::getTargetTripleAttrName(), - builder.getStringAttr("aie")); + builder.getStringAttr(triple)); // Extract all CoreOps // Create an LLVM func for each CoreOp @@ -379,12 +408,6 @@ struct AIECoreToStandardPass DenseMap> tileToBuffers; DenseMap switchboxes; - if (m.getOps().empty()) { - m.emitOpError("expected AIE.device operation at toplevel"); - signalPassFailure(); - } - DeviceOp device = *(m.getOps().begin()); - NetlistAnalysis NL(device, tiles, cores, mems, locks, tileToBuffers, switchboxes); NL.collectTiles(tiles); @@ -404,6 +427,9 @@ struct AIECoreToStandardPass Type int384Type = IntegerType::get(builder.getContext(), 384); Type floatType = FloatType::getF32(builder.getContext()); + // Note that not all of these are valid for a particular design, or needed. + // For right now, we will just accept the noise. + // llvm.func @debug_i32(%val: !llvm.i32) -> () builder .create( @@ -498,6 +524,22 @@ struct AIECoreToStandardPass FunctionType::get(builder.getContext(), {int32Type, int32Type}, {})) .setPrivate(); + // llvm.func @llvm.aie2.acquire(%lock_id: !llvm.i32, %lock_val: + // !llvm.i32) ->()v + builder + .create( + builder.getUnknownLoc(), "llvm.aie2.acquire", + FunctionType::get(builder.getContext(), {int32Type, int32Type}, {})) + .setPrivate(); + + // llvm.func @llvm.aie2.release(%lock_id: !llvm.i32, %lock_val: + // !llvm.i32) ->() + builder + .create( + builder.getUnknownLoc(), "llvm.aie2.release", + FunctionType::get(builder.getContext(), {int32Type, int32Type}, {})) + .setPrivate(); + IRMapping mapper; ConversionTarget target(getContext()); target.addLegalDialect(); @@ -515,11 +557,15 @@ struct AIECoreToStandardPass AIEEventOpToStdLowering>(m.getContext(), m); patterns.add(m.getContext(), m, mapper); - patterns.add(m.getContext(), m, mapper, - tileToBuffers, 1, tileCol, tileRow); if (failed(applyPartialConversion(m, target, std::move(patterns)))) signalPassFailure(); + RewritePatternSet outlinePatterns(&getContext()); + outlinePatterns.add( + m.getContext(), m, mapper, tileToBuffers, 1, tileCol, tileRow); + if (failed(applyPartialConversion(m, target, std::move(outlinePatterns)))) + signalPassFailure(); + // Move all the func.func ops and memref.globals from the device to the // module outlineOps(device); diff --git a/test/aiecc/simple.mlir b/test/aiecc/simple.mlir index e19aa45b1b..6cdd854d04 100644 --- a/test/aiecc/simple.mlir +++ b/test/aiecc/simple.mlir @@ -15,14 +15,16 @@ // RUN: aiecc.py --no-unified --compile --no-link --no-xchesscc -nv --sysroot=%VITIS_SYSROOT% --host-target=aarch64-linux-gnu %s -I%aie_runtime_lib% %aie_runtime_lib%/test_library.cpp %S/test.cpp -o test.elf | FileCheck %s --check-prefix=PEANO // RUN: aiecc.py --no-unified --no-compile --no-link -nv --sysroot=%VITIS_SYSROOT% --host-target=aarch64-linux-gnu %s -I%aie_runtime_lib% %aie_runtime_lib%/test_library.cpp %S/test.cpp -o test.elf | FileCheck %s --check-prefix=NOCOMPILE +// Note that llc determines the architecture from the llvm IR. + // XCHESSCC-NOT: {{^llc}} // XCHESSCC: xchesscc_wrapper aie // XCHESSCC-NOT: {{^llc}} -// PEANO-NOT: xchesscc_wrapper aie +// PEANO-NOT: xchesscc_wrapper // PEANO: {{^llc}} // PEANO-SAME: --march=aie -// PEANO-NOT: xchesscc_wrapper aie -// NOCOMPILE-NOT: xchesscc_wrapper aie +// PEANO-NOT: xchesscc_wrapper +// NOCOMPILE-NOT: xchesscc_wrapper // NOCOMPILE-NOT: {{^llc}} module { diff --git a/tools/aiecc/aiecc/main.py b/tools/aiecc/aiecc/main.py index a4c4095fac..a28d1d9d41 100644 --- a/tools/aiecc/aiecc/main.py +++ b/tools/aiecc/aiecc/main.py @@ -223,7 +223,7 @@ async def process_core(self, core): if(not opts.unified): file_core_llvmir_stripped = self.tmpcorefile(core, "stripped.ll") await self.do_call(task, ['opt', '--passes=default,strip', '-S', file_core_llvmir, '-o', file_core_llvmir_stripped]) - await self.do_call(task, ['llc', file_core_llvmir_stripped, '-O2', '--march=aie', '--function-sections', '--filetype=obj', '-o', file_core_obj]) + await self.do_call(task, ['llc', file_core_llvmir_stripped, '-O2', '--march=%s' % self.aie_target.lower(), '--function-sections', '--filetype=obj', '-o', file_core_obj]) else: file_core_obj = self.file_obj if(opts.link and opts.xbridge): @@ -458,7 +458,7 @@ async def run_flow(self): self.file_llvmir_opt= os.path.join(self.tmpdirname, 'input.opt.ll') await self.do_call(progress_bar.task, ['opt', '--opaque-pointers=0', '--passes=default', '-inline-threshold=10', '-S', self.file_llvmir, '-o', self.file_llvmir_opt]) - await self.do_call(progress_bar.task, ['llc', self.file_llvmir_opt, '-O2', '--march=aie', '--function-sections', '--filetype=obj', '-o', self.file_obj]) + await self.do_call(progress_bar.task, ['llc', self.file_llvmir_opt, '-O2', '--march=%s' % self.aie_target.lower(), '--function-sections', '--filetype=obj', '-o', self.file_obj]) progress_bar.update(progress_bar.task,advance=0,visible=False) progress_bar.task_completed = progress_bar.add_task("[green] AIE Compilation:", total=len(cores)+1, command="%d Workers" % nworkers)