From 59153bfeaf606fe0e2b72a7741826ecac340b350 Mon Sep 17 00:00:00 2001 From: Jeff Fifield Date: Fri, 16 Aug 2024 13:04:27 -0600 Subject: [PATCH] Switch to txn 1.0 format (#1676) --- lib/Targets/AIETargetNPU.cpp | 67 ++++++++++---------- test/Targets/NPU/npu_blockwrite_instgen.mlir | 10 +-- test/Targets/NPU/npu_instgen.mlir | 21 ++---- 3 files changed, 40 insertions(+), 58 deletions(-) diff --git a/lib/Targets/AIETargetNPU.cpp b/lib/Targets/AIETargetNPU.cpp index a407f48451..58d8ef175e 100644 --- a/lib/Targets/AIETargetNPU.cpp +++ b/lib/Targets/AIETargetNPU.cpp @@ -69,7 +69,7 @@ void appendSync(std::vector &instructions, NpuSyncOp op) { void appendWrite32(std::vector &instructions, NpuWrite32Op op) { - auto words = reserveAndGetTail(instructions, 6); + auto words = reserveAndGetTail(instructions, 3); if (op.getBuffer()) { op.emitOpError("Cannot translate symbolic address"); @@ -78,24 +78,21 @@ void appendWrite32(std::vector &instructions, NpuWrite32Op op) { // XAIE_IO_WRITE words[0] = TXN_OPC_WRITE; - words[1] = 0; - words[2] = op.getAddress(); + words[1] = op.getAddress(); auto col = op.getColumn(); auto row = op.getRow(); if (col && row) { const AIETargetModel &tm = op->getParentOfType().getTargetModel(); - words[2] = ((*col & 0xff) << tm.getColumnShift()) | - ((*row & 0xff) << tm.getRowShift()) | (words[2] & 0xFFFFF); + words[1] = ((*col & 0xff) << tm.getColumnShift()) | + ((*row & 0xff) << tm.getRowShift()) | (words[1] & 0xFFFFF); } - words[3] = 0; - words[4] = op.getValue(); // Value - words[5] = words.size() * sizeof(uint32_t); // Operation Size + words[2] = op.getValue(); // Value } void appendMaskWrite32(std::vector &instructions, NpuMaskWrite32Op op) { - auto words = reserveAndGetTail(instructions, 7); + auto words = reserveAndGetTail(instructions, 4); if (op.getBuffer()) { op.emitOpError("Cannot translate symbolic address"); @@ -104,38 +101,33 @@ void appendMaskWrite32(std::vector &instructions, // XAIE_IO_MASKWRITE words[0] = TXN_OPC_MASKWRITE; - words[1] = 0; - words[2] = op.getAddress(); + words[1] = op.getAddress(); auto col = op.getColumn(); auto row = op.getRow(); if (col && row) { const AIETargetModel &tm = op->getParentOfType().getTargetModel(); - words[2] = ((*col & 0xff) << tm.getColumnShift()) | - ((*row & 0xff) << tm.getRowShift()) | (words[2] & 0xFFFFF); + words[1] = ((*col & 0xff) << tm.getColumnShift()) | + ((*row & 0xff) << tm.getRowShift()) | (words[1] & 0xFFFFF); } - words[3] = 0; - words[4] = op.getValue(); // Value - words[5] = op.getMask(); - words[6] = words.size() * sizeof(uint32_t); // Operation Size + words[2] = op.getValue(); // Value + words[3] = op.getMask(); } void appendAddressPatch(std::vector &instructions, NpuAddressPatchOp op) { - auto words = reserveAndGetTail(instructions, 12); + auto words = reserveAndGetTail(instructions, 6); // XAIE_IO_CUSTOM_OP_DDR_PATCH words[0] = TXN_OPC_DDR_PATCH; words[1] = words.size() * sizeof(uint32_t); // Operation Size - words[6] = op.getAddr(); - words[7] = 0; + words[2] = op.getAddr(); - words[8] = op.getArgIdx(); - words[9] = 0; + words[3] = op.getArgIdx(); - words[10] = op.getArgPlus(); - words[11] = 0; + words[4] = op.getArgPlus(); + words[5] = 0; } void appendBlockWrite(std::vector &instructions, NpuBlockWriteOp op) { @@ -172,22 +164,21 @@ void appendBlockWrite(std::vector &instructions, NpuBlockWriteOp op) { return; } - auto words = reserveAndGetTail(instructions, data.size() + 4); + auto words = reserveAndGetTail(instructions, data.size() + 3); // XAIE_IO_BLOCKWRITE words[0] = TXN_OPC_BLOCKWRITE; - words[1] = 0; - words[2] = op.getAddress(); + words[1] = op.getAddress(); auto col = op.getColumn(); auto row = op.getRow(); if (col && row) { const AIETargetModel &tm = op->getParentOfType().getTargetModel(); - words[2] = ((*col & 0xff) << tm.getColumnShift()) | - ((*row & 0xff) << tm.getRowShift()) | (words[2] & 0xFFFFF); + words[1] = ((*col & 0xff) << tm.getColumnShift()) | + ((*row & 0xff) << tm.getRowShift()) | (words[1] & 0xFFFFF); } - words[3] = words.size() * sizeof(uint32_t); // Operation Size + words[2] = words.size() * sizeof(uint32_t); // Operation Size - unsigned i = 4; + unsigned i = 3; for (auto d : data) words[i++] = d.getZExtValue(); } @@ -201,12 +192,18 @@ std::vector xilinx::AIE::AIETranslateToNPU(ModuleOp module) { auto words = reserveAndGetTail(instructions, 4); // setup txn header - words[0] = 0x06030100; - words[1] = 0x00000105; + uint8_t major = 1; + uint8_t minor = 0; + uint8_t devGen = 3; + uint8_t numRows = 6; + uint8_t numCols = 5; + uint8_t numMemTileRows = 1; + uint32_t count = 0; + words[0] = (numRows << 24) | (devGen << 16) | (minor << 8) | major; + words[1] = (numMemTileRows << 8) | numCols; DeviceOp deviceOp = *module.getOps().begin(); auto sequenceOps = deviceOp.getOps(); - int count = 0; for (auto f : sequenceOps) { Block &entry = f.getBody().front(); for (auto &o : entry) { @@ -236,7 +233,7 @@ std::vector xilinx::AIE::AIETranslateToNPU(ModuleOp module) { // write size fields of the txn header instructions[2] = count; - instructions[3] = instructions.size() * sizeof(uint32_t); + instructions[3] = instructions.size() * sizeof(uint32_t); // size of the txn return instructions; } diff --git a/test/Targets/NPU/npu_blockwrite_instgen.mlir b/test/Targets/NPU/npu_blockwrite_instgen.mlir index 244e862b68..06eceab6e3 100644 --- a/test/Targets/NPU/npu_blockwrite_instgen.mlir +++ b/test/Targets/NPU/npu_blockwrite_instgen.mlir @@ -14,10 +14,10 @@ module { aiex.runtime_sequence(%arg0: memref<16xf32>, %arg1: memref<16xf32>) { // TXN header - // CHECK: 06030100 + // CHECK: 06030001 // CHECK: 00000105 // CHECK: 00000003 - // CHECK: 00000068 + // CHECK: 00000058 %c16_i64 = arith.constant 16 : i64 %c1_i64 = arith.constant 1 : i64 @@ -25,10 +25,8 @@ module { %c64_i64 = arith.constant 64 : i64 %c0_i32 = arith.constant 0 : i32 %c1_i32 = arith.constant 1 : i32 - // CHECK: 00000001 - // CHECK: 00000000 // CHECK: 061A00C0 - // CHECK: 00000030 + // CHECK: 0000002C // CHECK: 00000001 // CHECK: 00580002 // CHECK: 000C0005 @@ -64,9 +62,7 @@ module { use_next_bd = 1 : i32, valid_bd = 1 : i32} // CHECK: 00000000 - // CHECK: 00000000 // CHECK: 06400DEF - // CHECK: 00000000 // CHECK: 00000042 aiex.npu.write32 { column = 3 : i32, row = 4 : i32, address = 0xabc00def : ui32, value = 0x42 : ui32 } diff --git a/test/Targets/NPU/npu_instgen.mlir b/test/Targets/NPU/npu_instgen.mlir index 18d8da233c..5b2b9a3ec2 100644 --- a/test/Targets/NPU/npu_instgen.mlir +++ b/test/Targets/NPU/npu_instgen.mlir @@ -14,32 +14,25 @@ module { memref.global "private" constant @write_data : memref<8xi32> = dense<[100, 101, 102, 103, 104 ,105, 106, 107]> aiex.runtime_sequence(%arg0: memref<16xf32>, %arg1: memref<16xf32>) { - // TXN header - // CHECK: 06030100 + // TXN header 1.0 + // CHECK: 06030001 // CHECK: 00000105 // CHECK: 00000006 - // CHECK: 000000CC + // CHECK: 000000A0 - // CHECK: 00000000 // CHECK: 00000000 // CHECK: 06400DEF - // CHECK: 00000000 // CHECK: 00000042 - // CHECK: 00000018 aiex.npu.write32 { column = 3 : i32, row = 4 : i32, address = 0xabc00def : ui32, value = 0x42 : ui32 } - // CHECK: 00000000 // CHECK: 00000000 // CHECK: ABC00DEF - // CHECK: 00000000 // CHECK: 00000314 - // CHECK: 00000018 aiex.npu.write32 { address = 0xabc00def : ui32, value = 0x314 : ui32 } // CHECK: 00000001 - // CHECK: 00000000 // CHECK: 12345679 - // CHECK: 00000030 + // CHECK: 0000002C // CHECK: 00000064 // CHECK: 00000065 // CHECK: 00000066 @@ -52,9 +45,8 @@ module { aiex.npu.blockwrite (%0) {address = 0x12345679 : ui32} : memref<8xi32> // CHECK: 00000001 - // CHECK: 00000000 // CHECK: 02100064 - // CHECK: 00000030 + // CHECK: 0000002C // CHECK: 00000064 // CHECK: 00000065 // CHECK: 00000066 @@ -66,12 +58,9 @@ module { aiex.npu.blockwrite (%0) { column = 1 : i32, row = 1 : i32, address = 100 : ui32} : memref<8xi32> // CHECK: 00000003 - // CHECK: 00000000 // CHECK: 0430567A - // CHECK: 00000000 // CHECK: 00001001 // CHECK: F00FF00F - // CHECK: 0000001C aiex.npu.maskwrite32 { column = 2 : i32, row = 3 : i32, address = 0x0000567A : ui32, value = 0x1001 : ui32, mask = 0xf00ff00f : ui32 } // CHECK: 00000080