Skip to content

Commit

Permalink
[Ctrl Pkt Reconfig E2E] New -aie-generate-ctrlpkt comilation mode i…
Browse files Browse the repository at this point in the history
…n `aiecc.py` (#1756)
  • Loading branch information
erwei-xilinx authored Sep 9, 2024
1 parent 5cb195a commit 00bd68c
Show file tree
Hide file tree
Showing 16 changed files with 468 additions and 1,008 deletions.
6 changes: 5 additions & 1 deletion include/aie-c/Translation.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ MLIR_CAPI_EXPORTED MlirStringRef aieTranslateAIEVecToCpp(MlirOperation op,
bool aie2);
MLIR_CAPI_EXPORTED MlirStringRef aieTranslateModuleToLLVMIR(MlirOperation op);
MLIR_CAPI_EXPORTED MlirStringRef aieTranslateToNPU(MlirOperation op);
MLIR_CAPI_EXPORTED MlirStringRef aieTranslateToControlPackets(MlirOperation op);
MLIR_CAPI_EXPORTED MlirStringRef
AIETranslateControlPacketsToUI32Vec(MlirOperation op);
MLIR_CAPI_EXPORTED MlirStringRef aieTranslateToXAIEV2(MlirOperation op);
MLIR_CAPI_EXPORTED MlirStringRef aieTranslateToHSA(MlirOperation op);
MLIR_CAPI_EXPORTED MlirStringRef aieTranslateToBCF(MlirOperation op, int col,
Expand All @@ -34,6 +35,9 @@ aieTranslateToCDODirect(MlirOperation moduleOp, MlirStringRef workDirPath,
MLIR_CAPI_EXPORTED MlirLogicalResult aieTranslateToTxn(
MlirOperation moduleOp, MlirStringRef outputFile, MlirStringRef workDirPath,
bool aieSim, bool xaieDebug, bool enableCores);
MLIR_CAPI_EXPORTED MlirLogicalResult aieTranslateToCtrlpkt(
MlirOperation moduleOp, MlirStringRef outputFile, MlirStringRef workDirPath,
bool aieSim, bool xaieDebug, bool enableCores);
MLIR_CAPI_EXPORTED MlirOperation aieTranslateBinaryToTxn(MlirContext ctx,
MlirStringRef binary);

Expand Down
18 changes: 14 additions & 4 deletions include/aie/Targets/AIETargets.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,11 @@ mlir::LogicalResult AIETranslateGraphXPE(mlir::ModuleOp module,
mlir::LogicalResult AIETranslateToNPU(mlir::ModuleOp module,
llvm::raw_ostream &output);
mlir::LogicalResult AIETranslateToNPU(mlir::ModuleOp, std::vector<uint32_t> &);
mlir::LogicalResult AIETranslateToControlPackets(mlir::ModuleOp module,
llvm::raw_ostream &output);
mlir::LogicalResult AIETranslateToControlPackets(mlir::ModuleOp,
std::vector<uint32_t> &);
mlir::LogicalResult
AIETranslateControlPacketsToUI32Vec(mlir::ModuleOp module,
llvm::raw_ostream &output);
mlir::LogicalResult
AIETranslateControlPacketsToUI32Vec(mlir::ModuleOp, std::vector<uint32_t> &);
mlir::LogicalResult AIETranslateToLdScript(mlir::ModuleOp module,
llvm::raw_ostream &output,
int tileCol, int tileRow);
Expand All @@ -63,6 +64,11 @@ AIETranslateToTxn(mlir::ModuleOp m, llvm::raw_ostream &output,
llvm::StringRef workDirPath, bool outputBinary = false,
bool aieSim = false, bool xaieDebug = false,
bool enableCores = true);
mlir::LogicalResult
AIETranslateToControlPackets(mlir::ModuleOp m, llvm::raw_ostream &output,
llvm::StringRef workDirPath,
bool outputBinary = false, bool aieSim = false,
bool xaieDebug = false, bool enableCores = true);

#ifdef AIE_ENABLE_AIRBIN
mlir::LogicalResult AIETranslateToAirbin(mlir::ModuleOp module,
Expand All @@ -77,6 +83,10 @@ mlir::LogicalResult AIETranslateToTargetArch(mlir::ModuleOp module,
std::optional<mlir::ModuleOp>
AIETranslateBinaryToTxn(mlir::MLIRContext *ctx, std::vector<uint8_t> &binary);

std::optional<mlir::ModuleOp>
AIETranslateBinaryToCtrlpkt(mlir::MLIRContext *ctx,
std::vector<uint8_t> &binary);

} // namespace AIE

namespace aievec {
Expand Down
37 changes: 35 additions & 2 deletions lib/CAPI/Translation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,39 @@ MlirLogicalResult aieTranslateToTxn(MlirOperation moduleOp,
return wrap(status);
}

MlirLogicalResult aieTranslateToCtrlpkt(MlirOperation moduleOp,
MlirStringRef outputFile,
MlirStringRef workDirPath, bool aieSim,
bool xaieDebug, bool enableCores) {
ModuleOp mod = llvm::cast<ModuleOp>(unwrap(moduleOp));
bool outputBinary = false;

std::string errorMessage;
auto output = openOutputFile(StringRef(outputFile.data, outputFile.length),
&errorMessage);
if (!output) {
llvm::errs() << errorMessage << "\n";
return wrap(failure());
}

auto status = AIETranslateToControlPackets(
mod, output->os(), llvm::StringRef(workDirPath.data, workDirPath.length),
outputBinary, aieSim, xaieDebug, enableCores);

std::vector<std::string> diagnostics;
ScopedDiagnosticHandler handler(mod.getContext(), [&](Diagnostic &d) {
llvm::raw_string_ostream(diagnostics.emplace_back())
<< d.getLocation() << ": " << d;
});

if (failed(status))
for (const auto &diagnostic : diagnostics)
std::cerr << diagnostic << "\n";
else
output->keep();
return wrap(status);
}

MlirOperation aieTranslateBinaryToTxn(MlirContext ctx, MlirStringRef binary) {
std::vector<uint8_t> binaryData(binary.data, binary.data + binary.length);
auto mod = AIETranslateBinaryToTxn(unwrap(ctx), binaryData);
Expand All @@ -133,11 +166,11 @@ MlirStringRef aieTranslateToNPU(MlirOperation moduleOp) {
return mlirStringRefCreate(cStr, npu.size());
}

MlirStringRef aieTranslateToControlPackets(MlirOperation moduleOp) {
MlirStringRef AIETranslateControlPacketsToUI32Vec(MlirOperation moduleOp) {
std::string npu;
llvm::raw_string_ostream os(npu);
ModuleOp mod = llvm::cast<ModuleOp>(unwrap(moduleOp));
if (failed(AIETranslateToControlPackets(mod, os)))
if (failed(AIETranslateControlPacketsToUI32Vec(mod, os)))
return mlirStringRefCreate(nullptr, 0);
char *cStr = static_cast<char *>(malloc(npu.size()));
npu.copy(cStr, npu.size());
Expand Down
130 changes: 129 additions & 1 deletion lib/Targets/AIETargetCDODirect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1189,6 +1189,115 @@ xilinx::AIE::AIETranslateBinaryToTxn(mlir::MLIRContext *ctx,
return module;
}

std::optional<mlir::ModuleOp>
xilinx::AIE::AIETranslateBinaryToCtrlpkt(mlir::MLIRContext *ctx,
std::vector<uint8_t> &binary) {

// parse the binary
std::vector<TransactionBinaryOperation> operations;
auto c = parseTransactionBinary(binary, operations);
if (!c) {
llvm::errs() << "Failed to parse binary\n";
return std::nullopt;
}
int columns = *c;

auto loc = mlir::UnknownLoc::get(ctx);

// create a new ModuleOp and set the insertion point
auto module = ModuleOp::create(loc);
OpBuilder builder(module.getBodyRegion());
builder.setInsertionPointToStart(module.getBody());

// create aie.device
std::vector<AIEDevice> devices{AIEDevice::npu1_1col, AIEDevice::npu1_2col,
AIEDevice::npu1_3col, AIEDevice::npu1_4col,
AIEDevice::npu1};
auto device = builder.create<DeviceOp>(loc, devices[columns - 1]);
device.getRegion().emplaceBlock();
builder.setInsertionPointToStart(device.getBody());

// for each blockwrite in the binary, create a GlobalOp with the data
std::vector<memref::GlobalOp> global_data;
for (auto &op : operations) {
if (op.cmd.Opcode != XAIE_IO_BLOCKWRITE) {
global_data.push_back(nullptr);
continue;
}
uint32_t size = op.cmd.Size / 4;
const uint32_t *d = reinterpret_cast<const uint32_t *>(op.cmd.DataPtr);
std::vector<uint32_t> data32(d, d + size);

int id = 0;
std::string name = "blockwrite_data";
while (device.lookupSymbol(name))
name = "blockwrite_data_" + std::to_string(id++);

MemRefType memrefType = MemRefType::get({size}, builder.getI32Type());
TensorType tensorType = RankedTensorType::get({size}, builder.getI32Type());
auto global = builder.create<memref::GlobalOp>(
loc, name, builder.getStringAttr("private"), memrefType,
DenseElementsAttr::get<uint32_t>(tensorType, data32), true, nullptr);
global_data.push_back(global);
}

// create aiex.runtime_sequence
auto seq = builder.create<AIEX::RuntimeSequenceOp>(loc, nullptr);
seq.getBody().push_back(new Block);

// create the txn ops
builder.setInsertionPointToStart(&seq.getBody().front());
for (auto p : llvm::zip(operations, global_data)) {
auto op = std::get<0>(p);
memref::GlobalOp payload = std::get<1>(p);

if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_WRITE) {
builder.create<AIEX::NpuControlPacketOp>(
loc, builder.getUI32IntegerAttr(op.cmd.RegOff), nullptr,
/*opcode*/ builder.getI32IntegerAttr(0),
/*stream_id*/ builder.getI32IntegerAttr(0),
DenseI32ArrayAttr::get(ctx, ArrayRef<int32_t>(op.cmd.Value)));
} else if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_BLOCKWRITE) {
if (!std::get<1>(p).getInitialValue())
continue;
auto blockWriteData =
dyn_cast<DenseIntElementsAttr>(*std::get<1>(p).getInitialValue());
if (!blockWriteData) {
payload.emitError(
"Global symbol initial value is not a dense int array");
break;
}
auto blockWriteDataValues = blockWriteData.getValues<int32_t>();
// Split block write data into beats of 4 or less, in int32_t.
int currAddr = op.cmd.RegOff;
for (size_t i = 0; i < blockWriteDataValues.size(); i += 4) {
auto last = std::min(blockWriteDataValues.size(), i + 4);
SmallVector<int32_t> splitData =
SmallVector<int32_t>(blockWriteDataValues.begin() + i,
blockWriteDataValues.begin() + last);
builder.create<AIEX::NpuControlPacketOp>(
loc, builder.getUI32IntegerAttr(currAddr), nullptr,
/*opcode*/ builder.getI32IntegerAttr(0),
/*stream_id*/ builder.getI32IntegerAttr(0),
DenseI32ArrayAttr::get(ctx, ArrayRef<int32_t>(splitData)));
currAddr += splitData.size() * sizeof(int32_t);
}

} else if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_MASKWRITE) {
builder.create<AIEX::NpuControlPacketOp>(
loc, builder.getUI32IntegerAttr(op.cmd.RegOff), nullptr,
/*opcode*/ builder.getI32IntegerAttr(0),
/*stream_id*/ builder.getI32IntegerAttr(0),
DenseI32ArrayAttr::get(ctx, ArrayRef<int32_t>(op.cmd.Value)));
} else {
llvm::errs() << "Unhandled txn opcode: " << op.cmd.Opcode << "\n";
return std::nullopt;
}
}

return module;
}

LogicalResult xilinx::AIE::AIETranslateToTxn(ModuleOp m,
llvm::raw_ostream &output,
llvm::StringRef workDirPath,
Expand All @@ -1208,8 +1317,27 @@ LogicalResult xilinx::AIE::AIETranslateToTxn(ModuleOp m,
auto new_module = AIETranslateBinaryToTxn(m.getContext(), bin);
if (!new_module)
return failure();

new_module->print(output);
return success();
}

LogicalResult xilinx::AIE::AIETranslateToControlPackets(
ModuleOp m, llvm::raw_ostream &output, llvm::StringRef workDirPath,
bool outputBinary, bool enableSim, bool xaieDebug, bool enableCores) {
std::vector<uint8_t> bin;
auto result =
translateToTxn(m, bin, workDirPath, enableSim, xaieDebug, enableCores);
if (failed(result))
return result;

if (outputBinary) {
output.write(reinterpret_cast<const char *>(bin.data()), bin.size());
return success();
}

auto new_module = AIETranslateBinaryToCtrlpkt(m.getContext(), bin);
if (!new_module)
return failure();
new_module->print(output);
return success();
}
12 changes: 6 additions & 6 deletions lib/Targets/AIETargetNPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -250,9 +250,8 @@ LogicalResult xilinx::AIE::AIETranslateToNPU(ModuleOp module,
return success();
}

LogicalResult
xilinx::AIE::AIETranslateToControlPackets(ModuleOp module,
std::vector<uint32_t> &instructions) {
LogicalResult xilinx::AIE::AIETranslateControlPacketsToUI32Vec(
ModuleOp module, std::vector<uint32_t> &instructions) {

DeviceOp deviceOp = *module.getOps<DeviceOp>().begin();
auto sequenceOps = deviceOp.getOps<AIEX::RuntimeSequenceOp>();
Expand Down Expand Up @@ -291,10 +290,11 @@ xilinx::AIE::AIETranslateToControlPackets(ModuleOp module,
return success();
}

LogicalResult xilinx::AIE::AIETranslateToControlPackets(ModuleOp module,
raw_ostream &output) {
LogicalResult
xilinx::AIE::AIETranslateControlPacketsToUI32Vec(ModuleOp module,
raw_ostream &output) {
std::vector<uint32_t> instructions;
auto r = AIETranslateToControlPackets(module, instructions);
auto r = AIETranslateControlPacketsToUI32Vec(module, instructions);
if (failed(r))
return r;
for (auto w : instructions)
Expand Down
22 changes: 20 additions & 2 deletions lib/Targets/AIETargets.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -375,14 +375,32 @@ void registerAIETranslations() {
[](ModuleOp module, raw_ostream &output) {
if (outputBinary == true) {
std::vector<uint32_t> instructions;
auto r = AIETranslateToControlPackets(module, instructions);
auto r = AIETranslateControlPacketsToUI32Vec(module, instructions);
if (failed(r))
return r;
output.write(reinterpret_cast<const char *>(instructions.data()),
instructions.size() * sizeof(uint32_t));
return success();
}
return AIETranslateToControlPackets(module, output);
return AIETranslateControlPacketsToUI32Vec(module, output);
},
registerDialects);
TranslateFromMLIRRegistration registrationCDOWithCtrlpkt(
"aie-generate-ctrlpkt",
"Generate control packet configuration. Use --aie-output-binary to "
"select between mlir (default) and binary output",
[](ModuleOp module, raw_ostream &output) {
SmallString<128> workDirPath_;
if (workDirPath.getNumOccurrences() == 0) {
if (llvm::sys::fs::current_path(workDirPath_))
llvm::report_fatal_error(
"couldn't get cwd to use as work-dir-path");
} else
workDirPath_ = workDirPath.getValue();
LLVM_DEBUG(llvm::dbgs() << "work-dir-path: " << workDirPath_ << "\n");
return AIETranslateToControlPackets(module, output, workDirPath_,
outputBinary, cdoAieSim,
cdoXaieDebug, cdoEnableCores);
},
registerDialects);
}
Expand Down
20 changes: 19 additions & 1 deletion python/AIEMLIRModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,23 @@ PYBIND11_MODULE(_aie, m) {
"module"_a, "output_file"_a, "work_dir_path"_a, "aiesim"_a = false,
"xaie_debug"_a = false, "enable_cores"_a = true);

m.def(
"generate_ctrlpkt",
[](MlirOperation op, const std::string &outputFile,
const std::string &workDirPath, bool aieSim, bool xaieDebug,
bool enableCores) {
mlir::python::CollectDiagnosticsToStringScope scope(
mlirOperationGetContext(op));
if (mlirLogicalResultIsFailure(aieTranslateToCtrlpkt(
op, {outputFile.data(), outputFile.size()},
{workDirPath.data(), workDirPath.size()}, aieSim, xaieDebug,
enableCores)))
throw py::value_error("Failed to generate control packets because: " +
scope.takeMessage());
},
"module"_a, "output_file"_a, "work_dir_path"_a, "aiesim"_a = false,
"xaie_debug"_a = false, "enable_cores"_a = true);

m.def(
"transaction_binary_to_mlir",
[](MlirContext ctx, py::bytes bytes) {
Expand All @@ -156,7 +173,8 @@ PYBIND11_MODULE(_aie, m) {
m.def(
"generate_control_packets",
[&stealCStr](MlirOperation op) {
py::str ctrlPackets = stealCStr(aieTranslateToControlPackets(op));
py::str ctrlPackets =
stealCStr(AIETranslateControlPacketsToUI32Vec(op));
auto individualInstructions =
ctrlPackets.attr("split")().cast<py::list>();
for (size_t i = 0; i < individualInstructions.size(); ++i)
Expand Down
8 changes: 8 additions & 0 deletions python/compiler/aiecc/cl_arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,14 @@ def parse_args(args=None):
const=True,
help="Generate txn binary for configuration",
)
parser.add_argument(
"--aie-generate-ctrlpkt",
dest="ctrlpkt",
default=False,
action="store_const",
const=True,
help="Generate control packets for configuration",
)
parser.add_argument(
"--aie-generate-xclbin",
dest="xcl",
Expand Down
Loading

0 comments on commit 00bd68c

Please sign in to comment.