diff --git a/include/aie/Dialect/AIE/IR/AIEInterfaces.td b/include/aie/Dialect/AIE/IR/AIEInterfaces.td index b31e8e9be9..90cc7d762c 100644 --- a/include/aie/Dialect/AIE/IR/AIEInterfaces.td +++ b/include/aie/Dialect/AIE/IR/AIEInterfaces.td @@ -133,12 +133,7 @@ def AIETarget : OpInterface<"AIETarget"> { ]; } -// def OffloadingTranslationAttrTrait : -// NativeTrait<"OffloadingTranslationAttrTrait", ""> { -// let cppNamespace = "::mlir::gpu"; -// } - -def MyOffsetSizeAndStrideOpInterface: OpInterfaceTrait<"::xilinx::AIE::MyOffsetSizeAndStrideOpInterface"> { -} +// Don't delete - see AIEDialect::myVerifyOffsetSizeAndStrideOp +def MyOffsetSizeAndStrideOpInterface: OpInterfaceTrait<"::xilinx::AIE::MyOffsetSizeAndStrideOpInterface"> {} #endif // AIE_INTERFACES \ No newline at end of file diff --git a/include/aie/Dialect/AIE/IR/AIEOps.td b/include/aie/Dialect/AIE/IR/AIEOps.td index 41affdce1a..627eb362cc 100644 --- a/include/aie/Dialect/AIE/IR/AIEOps.td +++ b/include/aie/Dialect/AIE/IR/AIEOps.td @@ -745,46 +745,67 @@ def AIE_DMABDPACKETOp: AIE_Op<"dma_bd_packet", []> { }]; } -def AIE_DMABDOp: AIE_Op<"dma_bd", []> { - let summary = "Declare a dma block descriptor op"; +def AIE_DMABDOp: AIE_Op<"dma_bd", [ + ParentOneOf<["MemOp", "MemTileDMAOp", "ShimDMAOp", "DMAOp"]>, + ]> { + let summary = "Declare a dma buffer descriptor op"; let description = [{ - This operation describes a block descriptor for DMA operations. In particular, it specifies - what buffer addresss to use, the transfer length, and the buffer type (A or B). + This operation describes a buffer descriptor for DMA operations. In particular, it specifies + what buffer to use, and optionally: - This operation must be used in an MLIR block that lives inside a MemOp's region. - The block descriptor specifies what lock to use and the buffer configuration. + 1. the offset into the buffer; + 2. the transfer length; + 3. the sizes and strides for n-d tensor addressing (described below); + 4. the "bd_id" with which to associate the buffer descriptor (most often left empty). + + `offset`, `len`, `size`s and `stride`s are all denominated in element width; e.g., transferring the whole of + `memref<512xi32>` means `len == 512`, and also while transferring the whole of `memref<512xi16>`, `len == 512`. + + The only caveat to this "everything-is-in-terms-of-element-width" rule is regarding the inner-most dimension's stride + (see [Important gotcha regarding strides](#important-gotcha-regarding-strides) below). + + `dma_bd` ops must appear in their own BBs (basic blocks) and such BBs can (optionally) include `use_lock` + operations (specifying an "acquire" and a "release" lock; see the `use_lock` operation) and subsequent BDs in + a "chain" of BDs (using `next_bd` as a "jump" to the next BB which contains a `dma_bd`). Example: ``` // this defines a BD that uses lock %lck0 and buffer %buf0 ^bd5: aie.use_lock(%lck, "Acquire", 0) - aie.dma_bd(<$buf0 : memref<512xi32>, 0, 512>, 1) + // transfer the first 32 elements of the memref + aie.dma_bd(<$buf0 : memref<128xi32>, 0, 32) aie.use_lock(%lck, "Release", 1) - br ^bd6 // point to the next Block, which is also a different Block Descriptor + aie.next_bd ^bd6 // point to the next bb, which describes the next buffer descriptor + ^bd6: + aie.use_lock(%lck, "Acquire", 1) + // transfer the last 32 elements of the memref + aie.dma_bd(<$buf1 : memref<128xi32>, 96, 32) + aie.use_lock(%lck, "Release", 0) + aie.next_bd ^end ... // this defines a BD that does not use any lock ^bd8: - aie.dma_bd(<$buf1 : memref<64xi32>, 0, 64>, 0) + aie.dma_bd(<$buf2 : memref<64xi32>, 0, 64) ``` - A DMA channel in a Memory Module can process one block descriptor after another by chaining them. - There are 16 block descriptors per Memory Module. They are shared by four DMA channels. + + ## Background/context: + + A DMA channel in a Memory Module can process one buffer descriptor after another by chaining them. + There are 16 buffer descriptors per Core memory module and 48 buffer descriptors per Memtile memory module. + They are shared by four DMA channels (or 12). ## DMA Data Layout Transformations on AIE-ML Devices AIE-ML devices can apply data layout transformations at the buffer descriptor level. These transformation are described by strides and sizes in up to three dimensions (four dimensions on memtiles). Strides and sizes can be supplied to the `dma_bd` - through an optional argument, an array of tuples ``. - - The first element of this array gives the _highest-dimension_ stride and - size, the last element of the array gives the lowest-dimension. - - Strides are always expressed in units of `i32`s; this is an architectural - requirement, as data is moved by the DMA at this fundamental size. + through an optional argument, an array of "tuple-like" attributes `bd_dim_layout`. + The first element of this array gives the outer-most dimension's stride and + size, the last element of the array gives the inner-most dimension's stride and size. We can model the access pattern strides and sizes generate by a series of nested loops. In general, a set of strides and sizes like this... @@ -820,11 +841,18 @@ def AIE_DMABDOp: AIE_Op<"dma_bd", []> { for(int k = 0; k < 8 /*size_0*/; k++) // access/store element at/to index (i * 16 /*stride_2*/ + j * 1 /*stride_1*/ + k * 2 /*stride_0*/) ``` + + ## Important gotcha regarding strides + + All strides are expressed in multiples of the element width (just like `len` and `offset`) + **with the caveat that the inner-most dimension's stride must be 1**. }]; let arguments = ( ins AnyMemRef:$buffer, - OptionalAttr:$offset, + // in multiples of element width (not bytes) + DefaultValuedOptionalAttr:$offset, + // in multiples of element width (not bytes) OptionalAttr:$len, OptionalAttr:$dimensions, OptionalAttr:$bd_id, @@ -840,8 +868,16 @@ def AIE_DMABDOp: AIE_Op<"dma_bd", []> { let extraClassDeclaration = [{ BufferOp getBufferOp(); - int getOffsetValue() { return getOffset().value_or(0); } - int getLenValue() { return getLen().value_or(getBuffer().getType().getNumElements()); } + int32_t getBufferElementTypeWidthInBytes() { + return getBuffer().getType().getElementTypeBitWidth() / 8; + } + int32_t getLenInBytes() { + if (std::optional len = getLen(); len.has_value()) + return len.value() * getBufferElementTypeWidthInBytes(); + else + return getBuffer().getType().getNumElements() * getBufferElementTypeWidthInBytes(); + } + int32_t getOffsetInBytes() { return getOffset() * getBufferElementTypeWidthInBytes(); } }]; let hasVerifier = 1; diff --git a/lib/Dialect/AIE/IR/AIEDialect.cpp b/lib/Dialect/AIE/IR/AIEDialect.cpp index 7ce933fba6..5bfeb2bbcc 100644 --- a/lib/Dialect/AIE/IR/AIEDialect.cpp +++ b/lib/Dialect/AIE/IR/AIEDialect.cpp @@ -1545,60 +1545,79 @@ LogicalResult DMABDOp::verify() { if (!isa(getBuffer().getDefiningOp())) return emitOpError( "BDs only support BufferOp or ExternalBufferOp operands."); - if (auto memOp = getOperation()->getParentOfType()) { - if (auto bufferOp = getBufferOp(); - bufferOp.getTileOp().colIndex() != memOp.colIndex() || - bufferOp.getTileOp().rowIndex() != memOp.rowIndex()) - return emitOpError("can only access a buffer in the same tile."); - } - // The following checks only apply if non-default strides/wraps are defined. - if (getDimensions()) { - MemRefType buffer = getBuffer().getType(); - // We are not restrictive about the type of the memref used as the input - // to the DMABD when used with multi-dimensional strides/wraps. Since the - // BD will use the memref as a base address and copy from it in 32 bit - // chunks, while assuming the layout of the memref is contiguous. We - // assume the user/compiler understands and accounts for this. - uint64_t memrefSize = 1; // in bytes - uint64_t maxIdx = 0; - for (int64_t memrefDim : buffer.getShape()) - memrefSize *= 4 * memrefDim; - - ArrayRef dims = *getDimensions(); + if (getLenInBytes() % 4) + return emitOpError("transfer length must be multiple of 4 (i.e., represent " + "4 byte aligned address)"); + + TileID parentTileId = getParentTileElement(getOperation()).getTileID(); + + if (getOperation()->getParentOfType() && + (getBufferOp().getTileOp().colIndex() != parentTileId.col || + getBufferOp().getTileOp().rowIndex() != parentTileId.row)) + return emitOpError( + "Core tile DMAs can only access a buffer in the same tile."); + + const AIETargetModel &targetModel = getTargetModel(getOperation()); + + uint32_t maxBds = targetModel.getNumBDs(parentTileId.col, parentTileId.row); + if (std::optional bdId = getBdId(); + bdId.has_value() && static_cast(*bdId) >= maxBds) + return emitOpError("bdId attribute exceeds max: ") << maxBds - 1; + if (std::optional nextBdId = getNextBdId(); + nextBdId.has_value() && static_cast(*nextBdId) >= maxBds) + return emitOpError("nextBdId attribute exceeds max: ") << maxBds - 1; + if (auto dims = getDimensions(); dims.has_value()) { size_t maxNDims = 3; if (isa_and_nonnull(getOperation()->getParentOp())) maxNDims = 4; - - if (dims.size() > maxNDims) + if (dims->size() > maxNDims) return emitOpError() << "Cannot give more than " << std::to_string(maxNDims) << " dimensions for step sizes and wraps in this " " tile (got " - << std::to_string(dims.size()) << " dimensions)."; + << std::to_string(dims->size()) << " dimensions)."; - for (BDDimLayoutAttr dim : dims) { + MemRefType buffer = getBuffer().getType(); + int64_t maxIdx = 0; + for (BDDimLayoutAttr dim : *dims) { maxIdx += dim.getStride() * (dim.getSize() - 1); if (0 == dim.getStride()) return emitOpError() << "Invalid step size; must be a positive integer."; - if (dim.getStride() > memrefSize) + if (dim.getStride() > buffer.getNumElements()) return emitOpError() - << "Step size " << std::to_string(dim.getStride() * 4) << " " - << "bytes exceeds memref size " << std::to_string(memrefSize); + << "Step size " << std::to_string(dim.getStride()) << " " + << "exceeds memref size " + << std::to_string(buffer.getNumElements()); if (dim.getSize() >= (1UL << 9) + 1) return emitOpError() << "Size may not exceed 1023."; if (dim.getStride() >= (1UL << 19)) return emitOpError() << "Stride may not exceed " << (1 << 20); } - if (memrefSize <= 4 * maxIdx) + if (buffer.getNumElements() <= maxIdx) return emitOpError() << "Specified stride(s) and size(s) result in out " "of bounds access in buffer, for index " - << std::to_string(maxIdx) << ", accessing at " - << std::to_string(4 * maxIdx) - << " byte offset in memref of length " - << std::to_string(memrefSize) << "."; + << std::to_string(maxIdx) << " in memref of length " + << std::to_string(buffer.getNumElements()) << "."; + + // Since streams read 32b words, there's no way to read eg 16b with stride + // of 2 (ie lower halfs of each 32b). So force it to be 1 (and then in + // CDODirect/XAIEV2 scale the size by 4/getBufferElementTypeWidthInBytes). + if (getBufferElementTypeWidthInBytes() < 4 && dims->back().getStride() != 1) + return emitOpError( + "For <32b width datatypes, inner-most dim stride must be 1"); + } + if (targetModel.isMemTile(parentTileId.col, parentTileId.row) || + targetModel.isCoreTile(parentTileId.col, parentTileId.row)) { + if (auto baseAddr = getBufferOp().getAddress(); baseAddr.has_value()) { + int offsetInBytes = *baseAddr + getOffsetInBytes(); + if (offsetInBytes % 4) + return emitOpError( + "bd address must be 4 byte (32b) aligned; got base+offset: ") + << offsetInBytes << " (bytes)"; + } } if (!getLen() && !getBuffer().getType().hasStaticShape()) diff --git a/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp b/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp index 7731f03c47..46741dab04 100644 --- a/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp +++ b/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp @@ -204,14 +204,6 @@ struct AIEObjectFifoStatefulTransformPass return !hasSharedMemory || atLeastOneConsumerWantsTransform; } - /// Function to multiply all dimensions of a memref. - int64_t getMemrefTypeSize(MemRefType memref) { - int64_t size = 1; - for (auto dim : memref.getShape()) - size *= dim; - return size; - } - /// Function to retrieve ObjectFifoLinkOp of ObjectFifoCreateOp, /// if it belongs to one. std::optional getOptionalLinkOp(ObjectFifoCreateOp op) { @@ -325,14 +317,14 @@ struct AIEObjectFifoStatefulTransformPass .getElemType() .cast(); auto elemInType = fifoInType.getElementType().cast(); - int inSize = getMemrefTypeSize(elemInType); + int inSize = elemInType.getNumElements(); auto fifoOutType = linkOp->getOutputObjectFifos()[0] .getElemType() .cast(); auto elemOutType = fifoOutType.getElementType().cast(); - if (int outSize = getMemrefTypeSize(elemOutType); inSize >= outSize) { + if (int outSize = elemOutType.getNumElements(); inSize >= outSize) { if (op.name() != fifoIn.name()) return; } else { @@ -474,11 +466,10 @@ struct AIEObjectFifoStatefulTransformPass int acqNum = 1; int relNum = 1; - int offset = 0; auto fifo = op.getElemType().cast(); auto elemType = fifo.getElementType().cast(); - int len = getMemrefTypeSize(elemType); + int len = elemType.getNumElements(); // search for the buffers/locks (based on if this objFifo has a link) ObjectFifoCreateOp target = op; @@ -539,8 +530,8 @@ struct AIEObjectFifoStatefulTransformPass builder.setInsertionPointToStart(curr); createBdBlock(builder, target, lockMode, acqNum, relNum, - buffersPerFifo[target][blockIndex], offset, len, - channelDir, blockIndex, succ, dims); + buffersPerFifo[target][blockIndex], /*offset*/ 0, + len, channelDir, blockIndex, succ, dims); curr = succ; blockIndex++; } @@ -558,7 +549,6 @@ struct AIEObjectFifoStatefulTransformPass int acqNum = 1; int relNum = 1; - int offset = 0; // search for ShimDMAOp Operation *producerDMA = nullptr; @@ -612,12 +602,12 @@ struct AIEObjectFifoStatefulTransformPass succ = builder.createBlock(endBlock); MemRefType buffer = externalBuffersPerFifo[op][blockIndex].getType(); - int len = getMemrefTypeSize(buffer); + int len = buffer.getNumElements(); builder.setInsertionPointToStart(curr); createBdBlock(builder, op, lockMode, acqNum, relNum, externalBuffersPerFifo[op][blockIndex], - offset, len, channelDir, blockIndex, succ, - dims); + /*offset*/ 0, len, channelDir, blockIndex, + succ, dims); curr = succ; blockIndex++; } @@ -633,11 +623,9 @@ struct AIEObjectFifoStatefulTransformPass if (numBlocks == 0) return; - int offset = 0; auto fifo = op.getElemType().cast(); auto elemType = fifo.getElementType().cast(); - int lenOut = getMemrefTypeSize(elemType); - int bytes = elemType.getElementTypeBitWidth() / 8; + int lenOut = elemType.getNumElements(); int acqNum = 1; int relNum = 1; @@ -663,7 +651,7 @@ struct AIEObjectFifoStatefulTransformPass auto elemType = fifoType.getElementType().cast(); if (fifoIn.name() == op.name()) break; - extraOffset += getMemrefTypeSize(elemType); + extraOffset += elemType.getNumElements(); } } } else if (linkOp->isDistribute()) { @@ -678,7 +666,7 @@ struct AIEObjectFifoStatefulTransformPass auto elemType = fifoType.getElementType().cast(); if (fifoOut.name() == op.name()) break; - extraOffset += getMemrefTypeSize(elemType); + extraOffset += elemType.getNumElements(); } } } else { @@ -686,7 +674,7 @@ struct AIEObjectFifoStatefulTransformPass auto targetFifo = target.getElemType().cast(); auto targetElemType = targetFifo.getElementType().cast(); - lenOut = getMemrefTypeSize(targetElemType); + lenOut = targetElemType.getNumElements(); } } @@ -748,8 +736,9 @@ struct AIEObjectFifoStatefulTransformPass succ = builder.createBlock(endBlock); builder.setInsertionPointToStart(curr); + int offset = 0; if (isDistribute || isJoin) - offset = extraOffset * bytes; + offset = extraOffset; createBdBlock(builder, target, lockMode, acqNum, relNum, buffersPerFifo[target][blockIndex], offset, lenOut, channelDir, blockIndex, succ, dims); diff --git a/lib/Targets/AIETargetAirbin.cpp b/lib/Targets/AIETargetAirbin.cpp index 6c78a00ed3..6f99031283 100644 --- a/lib/Targets/AIETargetAirbin.cpp +++ b/lib/Targets/AIETargetAirbin.cpp @@ -595,14 +595,12 @@ static BDInfo getBDInfo(Block &block) { BDInfo bdInfo; for (auto op : block.getOps()) { bdInfo.foundBD = true; - auto bufferType = op.getBuffer().getType().cast<::mlir::MemRefType>(); - assert(op.getBufferOp().getAddress().has_value() && "buffer op should have address"); bdInfo.baseAddrA = op.getBufferOp().getAddress().value(); - bdInfo.lenA = op.getLenValue(); - bdInfo.bytesA = bufferType.getElementTypeBitWidth() / 8u; - bdInfo.offsetA = op.getOffsetValue(); + bdInfo.lenA = op.getLenInBytes(); + bdInfo.bytesA = op.getBufferElementTypeWidthInBytes(); + bdInfo.offsetA = op.getOffsetInBytes(); bdInfo.bufA = "XAIEDMA_TILE_BD_ADDRA"; bdInfo.hasA = true; } diff --git a/lib/Targets/AIETargetCDODirect.cpp b/lib/Targets/AIETargetCDODirect.cpp index 5d7583c859..0a44226131 100644 --- a/lib/Targets/AIETargetCDODirect.cpp +++ b/lib/Targets/AIETargetCDODirect.cpp @@ -273,48 +273,57 @@ LogicalResult configureBdInBlock(XAie_DevInst &devInst, XAie_DmaDesc &dmaTileBd, qOs, cache, secure); } - // deref here because this is a const iter and the various getters below - // aren't const (even though they probably should be...) // StringRef FifoMode = disable; // FIXME: when to enable FIFO mode? - ShapedType bufferType = bdOp.getBuffer().getType().cast<::mlir::MemRefType>(); - int bytes = bufferType.getElementTypeBitWidth() / 8; int baseAddr = 0; if (!targetModel.isShimNOCTile(tileLoc.Col, tileLoc.Row)) { auto bufferOp = cast(bdOp.getBuffer().getDefiningOp()); - assert(bufferOp.getAddress().has_value() && "buffer must have address"); + if (!bufferOp.getAddress()) + return bufferOp.emitError("buffer must have address assigned"); baseAddr = bufferOp.getAddress().value(); if (targetModel.isMemTile(tileLoc.Col, tileLoc.Row)) baseAddr += BASE_ADDR_A_INCR; } std::optional> dims = bdOp.getDimensions(); - int lenInBytes = bdOp.getLenValue() * bytes; - int basePlusOffset = baseAddr + bdOp.getOffsetValue(); + int lenInBytes = bdOp.getLenInBytes(); + int basePlusOffsetInBytes = baseAddr + bdOp.getOffsetInBytes(); if (!dims) { TRY_XAIE_API_EMIT_ERROR(bdOp, XAie_DmaSetAddrLen, &dmaTileBd, - basePlusOffset, lenInBytes); + basePlusOffsetInBytes, lenInBytes); } else { XAie_DmaTensor dmaTileBdTensor = {}; dmaTileBdTensor.NumDim = dims->size(); dmaTileBdTensor.Dim = static_cast( - calloc(dims->size(), sizeof(XAie_DmaDimDesc))); + calloc(dmaTileBdTensor.NumDim, sizeof(XAie_DmaDimDesc))); if (!dmaTileBdTensor.Dim) return bdOp.emitError("couldn't allocate array of XAie_DmaDimDesc"); - // TODO(max): rethink this? + // libxaie requires stride in multiples of 32b + double elementWidthIn32bWords = + static_cast(bdOp.getBufferElementTypeWidthInBytes()) / 4.0; for (size_t i = 0; i < dims->size(); i++) { // Pass down dimensions in reverse order; in the MLIR, this allows // us to specify step sizes/wraps in the same order as we would // access a multi-dim C array, with the highest dimension first. int j = dims->size() - i - 1; - // Assume AIE-ML architecture; we assert this above - // TODO(max): no we don't - dmaTileBdTensor.Dim[j].AieMlDimDesc = {dims.value()[i].getStride(), - dims.value()[i].getSize()}; + uint16_t size; + uint32_t stride; + if (j > 0) { + stride = static_cast(dims.value()[i].getStride() * + elementWidthIn32bWords); + size = dims.value()[i].getSize(); + } else { + stride = dims.value()[i].getStride(); + size = static_cast(dims.value()[i].getSize() * + elementWidthIn32bWords); + } + stride = stride > 0 ? stride : 1; + // Assume AIE-ML architecture (ie use AieMlDimDesc instead of AieDimDesc); + // asserted in AIETranslateToCDODirect). + dmaTileBdTensor.Dim[j].AieMlDimDesc = {stride, size}; } - // TODO: Probably need special handling for NOC - // TODO: Might need to adjust step sizes / wraps by -1 TRY_XAIE_API_EMIT_ERROR(bdOp, XAie_DmaSetMultiDimAddr, &dmaTileBd, - &dmaTileBdTensor, basePlusOffset, lenInBytes); + &dmaTileBdTensor, basePlusOffsetInBytes, + lenInBytes); } if (nextBdId) { @@ -324,8 +333,9 @@ LogicalResult configureBdInBlock(XAie_DevInst &devInst, XAie_DmaDesc &dmaTileBd, } if (packetID) { - assert(packetType && "must have packetType with packetID"); - if (bdOp.getLenValue() == 0) + if (!packetType) + bdOp.emitError("must have packetType with packetID"); + if (bdOp.getLen() == 0) return bdOp.emitOpError( "For MM2S channels, if Buffer_Length=0 then Enable_Packet must be " "set to 0, otherwise behavior is undefined (3.7.8 arch spec)"); diff --git a/lib/Targets/AIETargetShared.cpp b/lib/Targets/AIETargetShared.cpp index e9cee214d3..d8e494e84e 100644 --- a/lib/Targets/AIETargetShared.cpp +++ b/lib/Targets/AIETargetShared.cpp @@ -86,8 +86,12 @@ static std::string tileDMATensorStr(int col, int row, int bdNum) { void generateXAieDmaSetMultiDimAddr(raw_ostream &output, int ndims, ArrayRef dims, int col, int row, int bdNum, int baseAddrA, - int offsetA, int lenA, int bytesA, + int offsetA, int lenA, + int elementWidthInBytes, const char *errorRetval) { + // libxaie requires stride in multiples of 32b + double elementWidthIn32bWords = + static_cast(elementWidthInBytes) / 4.0; std::string tensor = tileDMATensorStr(col, row, bdNum); output << "XAie_DmaTensor " << tensor << " = {};\n"; output << tensor << ".NumDim = " << std::to_string(ndims) << ";\n"; @@ -98,23 +102,34 @@ void generateXAieDmaSetMultiDimAddr(raw_ostream &output, int ndims, output << "if(NULL == " << tensor << ".Dim){\n" << " return " << errorRetval << ";\n" << "}\n"; - for (int i = 0; i < ndims; i++) { + for (size_t i = 0; i < dims.size(); i++) { + uint16_t size; + uint32_t stride; // Pass down dimensions in reverse order; in the MLIR, this allows us // to specify strides/sizes in the same order as we would access a // multi-dim C array, with the highest dimension first. - int j = ndims - i - 1; + int j = dims.size() - i - 1; + if (j > 0) { + stride = + static_cast(dims[i].getStride() * elementWidthIn32bWords); + size = dims[i].getSize(); + } else { + stride = dims[i].getStride(); + size = static_cast(dims[i].getSize() * elementWidthIn32bWords); + } + stride = stride > 0 ? stride : 1; // Assume AIE-ML architecture; we assert this above output << tensor << ".Dim[" << std::to_string(j) << "].AieMlDimDesc" - << " = { /* StepSize */ " << std::to_string(dims[i].getStride()) - << ", /* Size */ " << std::to_string(dims[i].getSize()) << "};\n"; + << " = { /* Stride */ " << std::to_string(stride) << ", /* Size */ " + << std::to_string(size) << "};\n"; } + if ((baseAddrA + offsetA) % 4) + llvm::report_fatal_error("bd address must be 4B (32b) aligned"); output << "__mlir_aie_try(XAie_DmaSetMultiDimAddr(" << tileDMAInstRefStr(col, row, bdNum) << ", " << "&" << tensor << ", " << "0x" << llvm::utohexstr(baseAddrA + offsetA) << ", " - << " /* len */ " << lenA << " * " << bytesA << "));\n"; - // TODO: Probably need special handling for NOC - // TODO: Might need to adjust strides / sizes by -1 + << " /* len */ " << lenA << "));\n"; } } // namespace xilinx::AIE diff --git a/lib/Targets/AIETargetShared.h b/lib/Targets/AIETargetShared.h index f1a86af236..7c8e0977d8 100644 --- a/lib/Targets/AIETargetShared.h +++ b/lib/Targets/AIETargetShared.h @@ -34,7 +34,8 @@ std::string packetStr(int id, int type); void generateXAieDmaSetMultiDimAddr(llvm::raw_ostream &output, int ndims, llvm::ArrayRef dims, int col, int row, int bdNum, int baseAddrA, - int offsetA, int lenA, int bytesA, + int offsetA, int lenA, + int elementWidthInBytes, const char *errorRet); } // namespace AIE diff --git a/lib/Targets/AIETargetXAIEV2.cpp b/lib/Targets/AIETargetXAIEV2.cpp index c6fe403f83..a07dfc69d4 100644 --- a/lib/Targets/AIETargetXAIEV2.cpp +++ b/lib/Targets/AIETargetXAIEV2.cpp @@ -89,20 +89,14 @@ mlir::LogicalResult generateDMAConfig(OpType memOp, raw_ostream &output, int packetID = 0; bool foundBd = false; int lenA = 0; - int lenB = 0; - int bytesA = 0; - int bytesB = 0; int offsetA = 0; int BaseAddrA = 0; - bool hasA = false; - bool hasB = false; + int elementWidthInBytes = 0; int ndims = 0; ArrayRef dims; // StringRef FifoMode = disable; // FIXME: when to enable FIFO mode? for (auto op : block.template getOps()) { foundBd = true; - ShapedType bufferType = - op.getBuffer().getType().template cast<::mlir::MemRefType>(); if (!targetModel.isShimNOCTile(col, row)) { assert(op.getBufferOp().getAddress() && "buffer must have address assigned"); @@ -121,11 +115,9 @@ mlir::LogicalResult generateDMAConfig(OpType memOp, raw_ostream &output, } } - lenA = op.getLenValue(); - bytesA = bufferType.getElementTypeBitWidth() / 8; - offsetA = op.getOffsetValue() * bytesA; - hasA = true; - + lenA = op.getLenInBytes(); + offsetA = op.getOffsetInBytes(); + elementWidthInBytes = op.getBufferElementTypeWidthInBytes(); if (op.getDimensions()) { dims = *op.getDimensions(); ndims = dims.size(); @@ -137,13 +129,6 @@ mlir::LogicalResult generateDMAConfig(OpType memOp, raw_ostream &output, "buffer descriptor. This is currently only " "supported for AIE-ML devices."); - if (hasA && hasB) { - if (lenA != lenB) - llvm::errs() << "ABmode must have matching lengths.\n"; - if (bytesA != bytesB) - llvm::errs() << "ABmode must have matching element data types.\n"; - } - int acqValue = 0, relValue = 0; bool hasAcq = false, hasRel = false; int acqLockID = 0, relLockID = 0; @@ -186,8 +171,6 @@ mlir::LogicalResult generateDMAConfig(OpType memOp, raw_ostream &output, int bdNum = blockMap[&block]; if (foundBd) { - // TODO AB mode separated - // TODO For now, we are going to name each dma desc with loc and bd // which we assume is unique. This is strictly not enforced but in // practice, this is true @@ -214,7 +197,7 @@ mlir::LogicalResult generateDMAConfig(OpType memOp, raw_ostream &output, << tileDMAInstRefStr(col, row, bdNum) << ", /* addrA */ " << "mlir_aie_external_get_addr_myBuffer_" << col << row << "_" << bdNum << "(), " - << " /* len */ " << lenA << " * " << bytesA << "));\n"; + << " /* len */ " << lenA << "));\n"; output << "__mlir_aie_try(XAie_DmaSetAxi(" << tileDMAInstRefStr(col, row, bdNum) << ", " << "/* smid */ 0, " @@ -222,14 +205,18 @@ mlir::LogicalResult generateDMAConfig(OpType memOp, raw_ostream &output, << "/* QoS */ 0, " << "/* Cache */ 0, " << "/* Secure */ " << enable << "));\n"; - } else + } else { + if ((BaseAddrA + offsetA) % 4) + return memOp.emitError("bd address must be 4B (32b) aligned"); output << "__mlir_aie_try(XAie_DmaSetAddrLen(" << tileDMAInstRefStr(col, row, bdNum) << ", /* addrA */ " << "0x" << llvm::utohexstr(BaseAddrA + offsetA) << ", " - << " /* len */ " << lenA << " * " << bytesA << "));\n"; + << " /* len */ " << lenA << "));\n"; + } } else generateXAieDmaSetMultiDimAddr(output, ndims, dims, col, row, bdNum, - BaseAddrA, offsetA, lenA, bytesA, "1"); + BaseAddrA, offsetA, lenA, + elementWidthInBytes, "1"); if (block.getNumSuccessors() > 0) { Block *nextBlock = block.getSuccessors()[0]; // should have only one @@ -560,7 +547,7 @@ mlir::LogicalResult AIETranslateToXAIEV2(ModuleOp module, raw_ostream &output) { blockMap[&block] = bdNum; uint64_t offset = 0; for (auto op : block.getOps()) { - offset = op.getOffsetValue(); + offset = op.getOffsetInBytes(); auto buffer = cast(op.getBuffer().getDefiningOp()); @@ -809,7 +796,7 @@ mlir::LogicalResult AIETranslateToXAIEV2(ModuleOp module, raw_ostream &output) { int row = coord.row; auto loc = tileLocStr(col, row); - auto bufferAccessor = [&](std::optional tile, BufferOp buf) { + auto bufferAccessor = [&](BufferOp buf) { // int32_t mlir_aie_read_buffer_a13(int index) { // void mlir_aie_write_buffer_a13(int index, int32_t value) { std::string bufName(buf.name().getValue()); @@ -866,7 +853,7 @@ mlir::LogicalResult AIETranslateToXAIEV2(ModuleOp module, raw_ostream &output) { // if(tiles.count(tile.getValue())) for (auto buf : buffers[tileOp]) - bufferAccessor(coord, buf); + bufferAccessor(buf); } auto lockAccessor = [&](LockOp lock) { diff --git a/lib/Targets/CMakeLists.txt b/lib/Targets/CMakeLists.txt index af6556f80e..c884ebc32c 100644 --- a/lib/Targets/CMakeLists.txt +++ b/lib/Targets/CMakeLists.txt @@ -7,9 +7,7 @@ add_subdirectory(AIEVecToCpp) -set(LLVM_OPTIONAL_SOURCES AIETargetAirbin.cpp) - -set(_sources +add_mlir_library(AIETargets AIETargets.cpp AIETargetBCF.cpp AIETargetCDODirect.cpp @@ -21,15 +19,8 @@ set(_sources ADFGenerateCppGraph.cpp AIEFlowsToJSON.cpp AIELLVMLink.cpp -) - -if(AIE_ENABLE_AIRBIN) - list(APPEND _sources AIETargetAirbin.cpp) -endif() - -add_mlir_library(AIETargets - ${_sources} + PARTIAL_SOURCES_INTENDED ENABLE_AGGREGATION ADDITIONAL_HEADER_DIRS @@ -55,6 +46,26 @@ add_mlir_library(AIETargets ADF ) +if(AIE_ENABLE_AIRBIN) + add_mlir_library(AIETargetAirbin + AIETargetAirbin.cpp + + PARTIAL_SOURCES_INTENDED + + LINK_COMPONENTS + Support + + LINK_LIBS PRIVATE + elf + + LINK_LIBS PUBLIC + AIE + AIEX + ) + target_link_libraries(AIETargets PUBLIC AIETargetAirbin) + target_compile_definitions(obj.AIETargets PRIVATE AIE_ENABLE_AIRBIN) +endif() + target_link_libraries(AIETargets PRIVATE xaienginecdo_static) add_dependencies(obj.AIETargets xaienginecdo_static xaienginecdo_static-headers) # for #include @@ -62,7 +73,3 @@ set(BOOTGEN_SOURCE_DIR ${PROJECT_SOURCE_DIR}/third_party/bootgen) target_include_directories(AIETargets SYSTEM PRIVATE ${BOOTGEN_SOURCE_DIR}) target_include_directories(obj.AIETargets SYSTEM PRIVATE ${BOOTGEN_SOURCE_DIR}) -if(AIE_ENABLE_AIRBIN) - target_link_libraries(AIETargets PRIVATE elf) - target_compile_definitions(obj.AIETargets PRIVATE AIE_ENABLE_AIRBIN) -endif() diff --git a/reference_designs/ipu-xrt/matrix_multiplication/aie2.py b/reference_designs/ipu-xrt/matrix_multiplication/aie2.py index aa82582442..46973f90a4 100644 --- a/reference_designs/ipu-xrt/matrix_multiplication/aie2.py +++ b/reference_designs/ipu-xrt/matrix_multiplication/aie2.py @@ -91,10 +91,10 @@ def device_body(): 2, memref_a_ty, [ - (m // r, r * k * word_size_in // 4), - (k // s, s * word_size_in // 4), - (r, k * word_size_in // 4), - (s * word_size_in // 4, 1), + (m // r, r * k), + (k // s, s), + (r, k), + (s, 1), ], ) object_fifo_link(inA, memA) @@ -108,10 +108,10 @@ def device_body(): 2, memref_b_ty, [ - (k // s, s * n * word_size_in // 4), - (n // t, t * word_size_in // 4), - (s, n * word_size_in // 4), - (t * word_size_in // 4, 1), + (k // s, s * n), + (n // t, t), + (s, n), + (t, 1), ], ) object_fifo_link(inB, memB) @@ -125,10 +125,10 @@ def device_body(): 2, memref_c_ty, [ - (m // r, r * n * word_size_out // 4), - (r, t * word_size_out // 4), - (n // t, r * t * word_size_out // 4), - (t * word_size_out // 4, 1), + (m // r, r * n), + (r, t), + (n // t, r * t), + (t, 1), ], ) object_fifo_link(memC, outC) diff --git a/reference_designs/ipu-xrt/matrix_multiplication_array/aie2.py b/reference_designs/ipu-xrt/matrix_multiplication_array/aie2.py index 7ced36a8ed..69a3c52394 100644 --- a/reference_designs/ipu-xrt/matrix_multiplication_array/aie2.py +++ b/reference_designs/ipu-xrt/matrix_multiplication_array/aie2.py @@ -197,10 +197,10 @@ def device_body(): 2, memRef_A_ty, [ - (m // r, r * k * word_size_in // 4), - (k // s, s * word_size_in // 4), - (r, k * word_size_in // 4), - (s * word_size_in // 4, 1), + (m // r, r * k), + (k // s, s), + (r, k), + (s, 1), ], ) object_fifo_link(inA_fifo_names[i], memA_fifo_names[i]) @@ -221,10 +221,10 @@ def device_body(): 2, memRef_B_ty, [ - (k // s, s * n * word_size_in // 4), - (n // t, t * word_size_in // 4), - (s, n * word_size_in // 4), - (t * word_size_in // 4, 1), + (k // s, s * n), + (n // t, t), + (s, n), + (t, 1), ], ) object_fifo_link(inB_fifo_names[i], memB_fifo_names[i]) @@ -246,10 +246,10 @@ def device_body(): 2, memRef_outC_ty, [ - (m // r, r * n * word_size_out // 4), - (r, t * word_size_out // 4), - (n // t, r * t * word_size_out // 4), - (t * word_size_out // 4, 1), + (m // r, r * n), + (r, t), + (n // t, r * t), + (t, 1), ], ) object_fifo_link(memC_fifo_names[i], outC_fifo_names[i]) diff --git a/reference_designs/ipu-xrt/matrix_multiplication_column/aie2.py b/reference_designs/ipu-xrt/matrix_multiplication_column/aie2.py index 5838dc598b..ed3eab17ab 100644 --- a/reference_designs/ipu-xrt/matrix_multiplication_column/aie2.py +++ b/reference_designs/ipu-xrt/matrix_multiplication_column/aie2.py @@ -102,10 +102,10 @@ def device_body(): 2, memRef_A_ty, [ - (m // r, r * k * word_size_in // 4), - (k // s, s * word_size_in // 4), - (r, k * word_size_in // 4), - (s * word_size_in // 4, 1), + (m // r, r * k), + (k // s, s), + (r, k), + (s, 1), ], ) object_fifo_link(inA, inA_fifo_names[0:n_cores]) @@ -119,10 +119,10 @@ def device_body(): 2, memRef_B_ty, [ - (k // s, s * n * word_size_in // 4), - (n // t, t * word_size_in // 4), - (s, n * word_size_in // 4), - (t * word_size_in // 4, 1), + (k // s, s * n), + (n // t, t), + (s, n), + (t, 1), ], ) object_fifo_link(inB, [inB_fifo_names[0]]) @@ -139,10 +139,10 @@ def device_body(): 2, memRef_outC_ty, [ - (m // r, r * n * word_size_out // 4), - (r, t * word_size_out // 4), - (n // t, r * t * word_size_out // 4), - (t * word_size_out // 4, 1), + (m // r, r * n), + (r, t), + (n // t, r * t), + (t, 1), ], ) object_fifo_link(outC_fifo_names[0:n_cores], outC) diff --git a/reference_designs/ipu-xrt/matrix_vector_multiplication/aie2.py b/reference_designs/ipu-xrt/matrix_vector_multiplication/aie2.py index fb4a8e97e8..80b5c89613 100644 --- a/reference_designs/ipu-xrt/matrix_vector_multiplication/aie2.py +++ b/reference_designs/ipu-xrt/matrix_vector_multiplication/aie2.py @@ -103,9 +103,8 @@ def device_body(): 2, memRef_A_ty, [ - (k_in_i32s, 1), - (m, k_in_i32s), - (1, 1), + (m, k), + (k, 1), ], ) object_fifo_link( diff --git a/test/Passes/assign-bd-ids/bad_bd_assignments.mlir b/test/Passes/assign-bd-ids/bad_bd_assignments.mlir new file mode 100644 index 0000000000..71ab96951f --- /dev/null +++ b/test/Passes/assign-bd-ids/bad_bd_assignments.mlir @@ -0,0 +1,128 @@ +//===- bad_bd_assignments.mlir.mlir ----------------------------*- MLIR -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2022 Xilinx Inc. +// +//===----------------------------------------------------------------------===// + +// RUN: aie-opt --verify-diagnostics --split-input-file %s + +module { + aie.device(ipu) { + %tile_0_2 = aie.tile(0, 2) + %double_buffer = aie.buffer(%tile_0_2) : memref<32xi32> + %lock_Y = aie.lock(%tile_0_2) {init = 0 : i32} + %mem_0_2 = aie.mem(%tile_0_2) { + %player_a = aie.dma(S2MM, 0) [{ + aie.use_lock(%lock_Y, Acquire, 0) + // expected-error@+1 {{'aie.dma_bd' op bdId attribute exceeds max: 15}} + aie.dma_bd(%double_buffer : memref<32xi32>, 0) {bd_id = 16 : i32, next_bd_id = 1 : i32} + aie.use_lock(%lock_Y, Release, 0) + }] + aie.end + } + } +} + +// ----- + +module { + aie.device(ipu) { + %tile_0_2 = aie.tile(0, 2) + %double_buffer = aie.buffer(%tile_0_2) : memref<32xi32> + %lock_X = aie.lock(%tile_0_2) {init = 0 : i32} + %mem_0_2 = aie.mem(%tile_0_2) { + %player_a = aie.dma(S2MM, 0) [{ + aie.use_lock(%lock_X, Acquire, 1) + // expected-error@+1 {{'aie.dma_bd' op nextBdId attribute exceeds max: 15}} + aie.dma_bd(%double_buffer : memref<32xi32>) {bd_id = 1 : i32, next_bd_id = 16 : i32} + aie.use_lock(%lock_X, Release, -1) + }] + aie.end + } + } +} + +// ----- + +module { + aie.device(ipu) { + %tile_0_1 = aie.tile(0, 1) + %buffer_0_1 = aie.buffer(%tile_0_1) : memref<32xi32> + %memtile_dma_0_1 = aie.memtile_dma(%tile_0_1) { + %lock_0_1 = aie.lock(%tile_0_1) {init = 1 : i32} + %lock_0_1_0 = aie.lock(%tile_0_1) {init = 0 : i32} + %0 = aie.dma(S2MM, 0) [{ + aie.use_lock(%lock_0_1, AcquireGreaterEqual) + // expected-error@+1 {{'aie.dma_bd' op bdId attribute exceeds max: 47}} + aie.dma_bd(%buffer_0_1 : memref<32xi32>) {bd_id = 48 : i32, next_bd_id = 1 : i32} + aie.use_lock(%lock_0_1_0, Release) + }] + aie.end + } + } +} + +// ----- + +module { + aie.device(ipu) { + %tile_0_1 = aie.tile(0, 1) + %memtile_dma_0_1 = aie.memtile_dma(%tile_0_1) { + %lock_0_1 = aie.lock(%tile_0_1) {init = 1 : i32} + %lock_0_1_0 = aie.lock(%tile_0_1) {init = 0 : i32} + %buffer_0_1 = aie.buffer(%tile_0_1) : memref<32xi32> + %0 = aie.dma(S2MM, 0) [{ + aie.use_lock(%lock_0_1, AcquireGreaterEqual) + // expected-error@+1 {{'aie.dma_bd' op nextBdId attribute exceeds max: 47}} + aie.dma_bd(%buffer_0_1 : memref<32xi32>) {bd_id = 1 : i32, next_bd_id = 48 : i32} + aie.use_lock(%lock_0_1_0, Release) + }] + aie.end + } + } +} + + +// ----- + +module { + aie.device(ipu) { + %tile_0_1 = aie.tile(0, 1) + %memtile_dma_0_1 = aie.memtile_dma(%tile_0_1) { + %lock_0_1 = aie.lock(%tile_0_1) {init = 1 : i32} + %lock_0_1_0 = aie.lock(%tile_0_1) {init = 0 : i32} + %buffer_0_1 = aie.buffer(%tile_0_1) : memref<32xi32> + %0 = aie.dma(S2MM, 0) [{ + aie.use_lock(%lock_0_1, AcquireGreaterEqual) + // expected-error@+1 {{'aie.dma_bd' op nextBdId attribute exceeds max: 47}} + aie.dma_bd(%buffer_0_1 : memref<32xi32>) {bd_id = 1 : i32, next_bd_id = 48 : i32} + aie.use_lock(%lock_0_1_0, Release) + }] + aie.end + } + } +} + +// ----- + +module { + aie.device(ipu) { + %tile_0_1 = aie.tile(0, 1) + %memtile_dma_0_1 = aie.memtile_dma(%tile_0_1) { + %lock_0_1 = aie.lock(%tile_0_1) {init = 1 : i32} + %lock_0_1_0 = aie.lock(%tile_0_1) {init = 0 : i32} + %buffer_0_1 = aie.buffer(%tile_0_1) : memref<128xi16> + %0 = aie.dma(S2MM, 0) [{ + aie.use_lock(%lock_0_1, AcquireGreaterEqual) + // expected-error@+1 {{'aie.dma_bd' op transfer length must be multiple of 4 (i.e., represent 4 byte aligned address)}} + aie.dma_bd(%buffer_0_1 : memref<128xi16>, 0, 129) + aie.use_lock(%lock_0_1_0, Release) + }] + aie.end + } + } +} \ No newline at end of file diff --git a/test/Targets/AIEGenerateXAIE/aie2_nd_DMA.mlir b/test/Targets/AIEGenerateXAIE/aie2_nd_DMA.mlir index 15e64b3ab5..5893b87bb0 100644 --- a/test/Targets/AIEGenerateXAIE/aie2_nd_DMA.mlir +++ b/test/Targets/AIEGenerateXAIE/aie2_nd_DMA.mlir @@ -15,11 +15,11 @@ // CHECK: if(NULL == dma_tile_2_1_bd_0_tensor.Dim){ // CHECK: return 1; // CHECK: } -// CHECK: dma_tile_2_1_bd_0_tensor.Dim[3].AieMlDimDesc = { /* StepSize */ 1, /* Size */ 2}; -// CHECK: dma_tile_2_1_bd_0_tensor.Dim[2].AieMlDimDesc = { /* StepSize */ 2, /* Size */ 3}; -// CHECK: dma_tile_2_1_bd_0_tensor.Dim[1].AieMlDimDesc = { /* StepSize */ 4, /* Size */ 2}; -// CHECK: dma_tile_2_1_bd_0_tensor.Dim[0].AieMlDimDesc = { /* StepSize */ 1, /* Size */ 1}; -// CHECK: __mlir_aie_try(XAie_DmaSetMultiDimAddr(&(dma_tile21_bd0), &dma_tile_2_1_bd_0_tensor, 0x82000, /* len */ 128 * 4)); +// CHECK: dma_tile_2_1_bd_0_tensor.Dim[3].AieMlDimDesc = { /* Stride */ 1, /* Size */ 2}; +// CHECK: dma_tile_2_1_bd_0_tensor.Dim[2].AieMlDimDesc = { /* Stride */ 2, /* Size */ 3}; +// CHECK: dma_tile_2_1_bd_0_tensor.Dim[1].AieMlDimDesc = { /* Stride */ 4, /* Size */ 2}; +// CHECK: dma_tile_2_1_bd_0_tensor.Dim[0].AieMlDimDesc = { /* Stride */ 1, /* Size */ 1}; +// CHECK: __mlir_aie_try(XAie_DmaSetMultiDimAddr(&(dma_tile21_bd0), &dma_tile_2_1_bd_0_tensor, 0x82000, /* len */ 512)); module @aie_module { aie.device(xcve2302) { diff --git a/test/Targets/AIEGenerateXAIE/aie2_tileDMA.mlir b/test/Targets/AIEGenerateXAIE/aie2_tileDMA.mlir index d686d76159..600ae4121a 100644 --- a/test/Targets/AIEGenerateXAIE/aie2_tileDMA.mlir +++ b/test/Targets/AIEGenerateXAIE/aie2_tileDMA.mlir @@ -13,7 +13,7 @@ // CHECK: XAie_DmaDesc [[bd0:.*]]; // CHECK: __mlir_aie_try(XAie_DmaDescInit(&(ctx->DevInst), &([[bd0]]), XAie_TileLoc(7,3))); // CHECK: __mlir_aie_try(XAie_DmaSetLock(&([[bd0]]), XAie_LockInit(3,-1),XAie_LockInit(4,1))); -// CHECK: __mlir_aie_try(XAie_DmaSetAddrLen(&([[bd0]]), /* addrA */ 0x720, /* len */ 256 * 4)); +// CHECK: __mlir_aie_try(XAie_DmaSetAddrLen(&([[bd0]]), /* addrA */ 0x720, /* len */ 1024)); // CHECK: __mlir_aie_try(XAie_DmaSetNextBd(&([[bd0]]), /* nextbd */ 0, /* enableNextBd */ 0)); // CHECK: __mlir_aie_try(XAie_DmaEnableBd(&([[bd0]]))); // CHECK: __mlir_aie_try(XAie_DmaWriteBd(&(ctx->DevInst), &([[bd0]]), XAie_TileLoc(7,3), /* bd */ 0)); diff --git a/test/Targets/AIEGenerateXAIE/aie2_tileDMA2.mlir b/test/Targets/AIEGenerateXAIE/aie2_tileDMA2.mlir index 62d61c09ae..cb5729b611 100644 --- a/test/Targets/AIEGenerateXAIE/aie2_tileDMA2.mlir +++ b/test/Targets/AIEGenerateXAIE/aie2_tileDMA2.mlir @@ -14,7 +14,7 @@ // CHECK: __mlir_aie_try(XAie_DmaDescInit(&(ctx->DevInst), &([[bd0]]), XAie_TileLoc(7,3))); // CHECK: __mlir_aie_try(XAie_DmaSetLock(&([[bd0]]), XAie_LockInit(3,-1),XAie_LockInit({{.*}},0))); // CHECK: [[bd0]].LockDesc.LockRelEn = XAIE_DISABLE; -// CHECK: __mlir_aie_try(XAie_DmaSetAddrLen(&([[bd0]]), /* addrA */ 0x720, /* len */ 256 * 4)); +// CHECK: __mlir_aie_try(XAie_DmaSetAddrLen(&([[bd0]]), /* addrA */ 0x720, /* len */ 1024)); // CHECK: __mlir_aie_try(XAie_DmaSetNextBd(&([[bd0]]), /* nextbd */ 0, /* enableNextBd */ 0)); // CHECK: __mlir_aie_try(XAie_DmaEnableBd(&([[bd0]]))); // CHECK: __mlir_aie_try(XAie_DmaWriteBd(&(ctx->DevInst), &([[bd0]]), XAie_TileLoc(7,3), /* bd */ 0)); diff --git a/test/Targets/AIEGenerateXAIE/aie2_tileDMA3.mlir b/test/Targets/AIEGenerateXAIE/aie2_tileDMA3.mlir index f69b28fa56..6568ba38b7 100644 --- a/test/Targets/AIEGenerateXAIE/aie2_tileDMA3.mlir +++ b/test/Targets/AIEGenerateXAIE/aie2_tileDMA3.mlir @@ -14,7 +14,7 @@ // CHECK: __mlir_aie_try(XAie_DmaDescInit(&(ctx->DevInst), &([[bd0]]), XAie_TileLoc(7,3))); // CHECK: __mlir_aie_try(XAie_DmaSetLock(&([[bd0]]), XAie_LockInit({{.*}},0),XAie_LockInit(4,1))); // CHECK: [[bd0]].LockDesc.LockAcqEn = XAIE_DISABLE; -// CHECK: __mlir_aie_try(XAie_DmaSetAddrLen(&([[bd0]]), /* addrA */ 0x720, /* len */ 256 * 4)); +// CHECK: __mlir_aie_try(XAie_DmaSetAddrLen(&([[bd0]]), /* addrA */ 0x720, /* len */ 1024)); // CHECK: __mlir_aie_try(XAie_DmaSetNextBd(&([[bd0]]), /* nextbd */ 0, /* enableNextBd */ 0)); // CHECK: __mlir_aie_try(XAie_DmaEnableBd(&([[bd0]]))); // CHECK: __mlir_aie_try(XAie_DmaWriteBd(&(ctx->DevInst), &([[bd0]]), XAie_TileLoc(7,3), /* bd */ 0)); diff --git a/test/Targets/AIEGenerateXAIE/aie2_tileDMA4.mlir b/test/Targets/AIEGenerateXAIE/aie2_tileDMA4.mlir index e65c86fbba..97db3f5ed4 100644 --- a/test/Targets/AIEGenerateXAIE/aie2_tileDMA4.mlir +++ b/test/Targets/AIEGenerateXAIE/aie2_tileDMA4.mlir @@ -13,7 +13,7 @@ // CHECK: XAie_DmaDesc [[bd0:.*]]; // CHECK: __mlir_aie_try(XAie_DmaDescInit(&(ctx->DevInst), &([[bd0]]), XAie_TileLoc(7,3))); // CHECK: __mlir_aie_try(XAie_DmaSetLock(&([[bd0]]), XAie_LockInit(3,-1),XAie_LockInit(4,1))); -// CHECK: __mlir_aie_try(XAie_DmaSetAddrLen(&([[bd0]]), /* addrA */ 0x720, /* len */ 256 * 4)); +// CHECK: __mlir_aie_try(XAie_DmaSetAddrLen(&([[bd0]]), /* addrA */ 0x720, /* len */ 1024)); // CHECK: __mlir_aie_try(XAie_DmaSetNextBd(&([[bd0]]), /* nextbd */ 0, /* enableNextBd */ 0)); // CHECK: __mlir_aie_try(XAie_DmaEnableBd(&([[bd0]]))); // CHECK: __mlir_aie_try(XAie_DmaWriteBd(&(ctx->DevInst), &([[bd0]]), XAie_TileLoc(7,3), /* bd */ 0)); diff --git a/test/Targets/AIEGenerateXAIE/aie2_tileDMA_locks.mlir b/test/Targets/AIEGenerateXAIE/aie2_tileDMA_locks.mlir index 46de9611b1..88cfe56e4f 100644 --- a/test/Targets/AIEGenerateXAIE/aie2_tileDMA_locks.mlir +++ b/test/Targets/AIEGenerateXAIE/aie2_tileDMA_locks.mlir @@ -13,7 +13,7 @@ // CHECK: XAie_DmaDesc [[bd0:.*]]; // CHECK: XAie_DmaDescInit(&(ctx->DevInst), &([[bd0]]), XAie_TileLoc(7,4)) // CHECK: XAie_DmaSetLock(&([[bd0]]), XAie_LockInit(3,-1),XAie_LockInit(4,1)) -// CHECK: XAie_DmaSetAddrLen(&([[bd0]]), /* addrA */ 0x720, /* len */ 256 * 4) +// CHECK: XAie_DmaSetAddrLen(&([[bd0]]), /* addrA */ 0x720, /* len */ 1024) // CHECK: XAie_DmaSetNextBd(&([[bd0]]), /* nextbd */ 1, /* enableNextBd */ 1) // CHECK: XAie_DmaEnableBd(&([[bd0]])) // CHECK: XAie_DmaWriteBd(&(ctx->DevInst), &([[bd0]]), XAie_TileLoc(7,4), /* bd */ 0) diff --git a/test/Targets/AIEGenerateXAIE/memTileDMA.mlir b/test/Targets/AIEGenerateXAIE/memTileDMA.mlir index 3ef0df299d..6cb7a5c45f 100644 --- a/test/Targets/AIEGenerateXAIE/memTileDMA.mlir +++ b/test/Targets/AIEGenerateXAIE/memTileDMA.mlir @@ -14,25 +14,25 @@ // CHECK: XAie_DmaDesc [[bd0:.*]]; // CHECK: __mlir_aie_try(XAie_DmaDescInit(&(ctx->DevInst), &([[bd0]]), XAie_TileLoc(2,1))); -// CHECK: __mlir_aie_try(XAie_DmaSetAddrLen(&([[bd0]]), /* addrA */ 0x82000, /* len */ 16 * 4)); +// CHECK: __mlir_aie_try(XAie_DmaSetAddrLen(&([[bd0]]), /* addrA */ 0x82000, /* len */ 64)); // CHECK: __mlir_aie_try(XAie_DmaSetNextBd(&([[bd0]]), {{.*}} 0, {{.*}} 1)); // CHECK: __mlir_aie_try(XAie_DmaWriteBd(&(ctx->DevInst), &([[bd0]]), XAie_TileLoc(2,1), {{.*}} 0)); // CHECK: XAie_DmaDesc [[bd24:.*]]; // CHECK: __mlir_aie_try(XAie_DmaDescInit(&(ctx->DevInst), &([[bd24]]), XAie_TileLoc(2,1))); -// CHECK: __mlir_aie_try(XAie_DmaSetAddrLen(&([[bd24]]), /* addrA */ 0x82000, /* len */ 16 * 4)); +// CHECK: __mlir_aie_try(XAie_DmaSetAddrLen(&([[bd24]]), /* addrA */ 0x82000, /* len */ 64)); // CHECK: __mlir_aie_try(XAie_DmaSetNextBd(&([[bd24]]), {{.*}} 24, {{.*}} 1)); // CHECK: __mlir_aie_try(XAie_DmaWriteBd(&(ctx->DevInst), &([[bd24]]), XAie_TileLoc(2,1), {{.*}} 24)); // CHECK: XAie_DmaDesc [[bd25:.*]]; // CHECK: __mlir_aie_try(XAie_DmaDescInit(&(ctx->DevInst), &([[bd25]]), XAie_TileLoc(2,1))); -// CHECK: __mlir_aie_try(XAie_DmaSetAddrLen(&([[bd25]]), /* addrA */ 0x80720, /* len */ 16 * 4)); +// CHECK: __mlir_aie_try(XAie_DmaSetAddrLen(&([[bd25]]), /* addrA */ 0x80720, /* len */ 64)); // CHECK: __mlir_aie_try(XAie_DmaSetNextBd(&([[bd25]]), {{.*}} 25, {{.*}} 1)); // CHECK: __mlir_aie_try(XAie_DmaWriteBd(&(ctx->DevInst), &([[bd25]]), XAie_TileLoc(2,1), {{.*}} 25)); // CHECK: XAie_DmaDesc [[bd1:.*]]; // CHECK: __mlir_aie_try(XAie_DmaDescInit(&(ctx->DevInst), &([[bd1]]), XAie_TileLoc(2,1))); -// CHECK: __mlir_aie_try(XAie_DmaSetAddrLen(&([[bd1]]), /* addrA */ 0x80720, /* len */ 16 * 4)); +// CHECK: __mlir_aie_try(XAie_DmaSetAddrLen(&([[bd1]]), /* addrA */ 0x80720, /* len */ 64)); // CHECK: __mlir_aie_try(XAie_DmaSetNextBd(&([[bd1]]), {{.*}} 1, {{.*}} 1)); // CHECK: __mlir_aie_try(XAie_DmaWriteBd(&(ctx->DevInst), &([[bd1]]), XAie_TileLoc(2,1), {{.*}} 1)); diff --git a/test/Targets/AIEGenerateXAIE/memTileDMA2.mlir b/test/Targets/AIEGenerateXAIE/memTileDMA2.mlir index aff0040436..fbe6d1a08e 100644 --- a/test/Targets/AIEGenerateXAIE/memTileDMA2.mlir +++ b/test/Targets/AIEGenerateXAIE/memTileDMA2.mlir @@ -14,7 +14,7 @@ // CHECK: XAie_DmaDescInit(&(ctx->DevInst), &([[bd0]]), XAie_TileLoc(2,1)) // CHECK: XAie_DmaSetLock(&([[bd0]]), XAie_LockInit(0,0),XAie_LockInit(0,1)) // CHECK: [[bd0]].LockDesc.LockAcqEn = XAIE_DISABLE; -// CHECK: XAie_DmaSetAddrLen(&([[bd0]]), /* addrA */ 0x0, /* len */ 16 * 4) +// CHECK: XAie_DmaSetAddrLen(&([[bd0]]), /* addrA */ 0x0, /* len */ 64) // CHECK: XAie_DmaSetNextBd(&([[bd0]]), /* nextbd */ 1, /* enableNextBd */ 1) // CHECK: XAie_DmaEnableBd(&([[bd0]])) // CHECK: XAie_DmaWriteBd(&(ctx->DevInst), &([[bd0]]), XAie_TileLoc(2,1), /* bd */ 0) @@ -23,7 +23,7 @@ // CHECK: XAie_DmaDescInit(&(ctx->DevInst), &([[bd1]]), XAie_TileLoc(2,1)) // CHECK: XAie_DmaSetLock(&([[bd1]]), XAie_LockInit(0,0),XAie_LockInit(64,1)) // CHECK: [[bd1]].LockDesc.LockAcqEn = XAIE_DISABLE; -// CHECK: XAie_DmaSetAddrLen(&([[bd1]]), /* addrA */ 0x80000, /* len */ 16 * 4) +// CHECK: XAie_DmaSetAddrLen(&([[bd1]]), /* addrA */ 0x80000, /* len */ 64) // CHECK: XAie_DmaSetNextBd(&([[bd1]]), /* nextbd */ 2, /* enableNextBd */ 1) // CHECK: XAie_DmaEnableBd(&([[bd1]])) // CHECK: XAie_DmaWriteBd(&(ctx->DevInst), &([[bd1]]), XAie_TileLoc(2,1), /* bd */ 1) @@ -32,7 +32,7 @@ // CHECK: XAie_DmaDescInit(&(ctx->DevInst), &([[bd2]]), XAie_TileLoc(2,1)) // CHECK: XAie_DmaSetLock(&([[bd2]]), XAie_LockInit(0,0),XAie_LockInit(128,1)) // CHECK: [[bd2]].LockDesc.LockAcqEn = XAIE_DISABLE; -// CHECK: XAie_DmaSetAddrLen(&([[bd2]]), /* addrA */ 0x100000, /* len */ 16 * 4) +// CHECK: XAie_DmaSetAddrLen(&([[bd2]]), /* addrA */ 0x100000, /* len */ 64) // CHECK: XAie_DmaSetNextBd(&([[bd2]]), /* nextbd */ 0, /* enableNextBd */ 0) // CHECK: XAie_DmaEnableBd(&([[bd2]])) // CHECK: XAie_DmaWriteBd(&(ctx->DevInst), &([[bd2]]), XAie_TileLoc(2,1), /* bd */ 2) diff --git a/test/Targets/AIEGenerateXAIE/shim.mlir b/test/Targets/AIEGenerateXAIE/shim.mlir index f9cf7ec751..df53be3f53 100644 --- a/test/Targets/AIEGenerateXAIE/shim.mlir +++ b/test/Targets/AIEGenerateXAIE/shim.mlir @@ -14,14 +14,14 @@ // CHECK: XAie_DmaDesc [[bd0:.*]]; // CHECK: __mlir_aie_try(XAie_DmaDescInit(&(ctx->DevInst), &([[bd0]]), XAie_TileLoc(2,0))); // CHECK: __mlir_aie_try(XAie_DmaSetLock(&([[bd0]]), XAie_LockInit(0,0),XAie_LockInit(0,1))); -// CHECK: __mlir_aie_try(XAie_DmaSetAddrLen(&([[bd0]]), {{.*}} mlir_aie_external_get_addr_myBuffer_20_0(), {{.*}} 16 * 4)); +// CHECK: __mlir_aie_try(XAie_DmaSetAddrLen(&([[bd0]]), {{.*}} mlir_aie_external_get_addr_myBuffer_20_0(), {{.*}} 64)); // CHECK: __mlir_aie_try(XAie_DmaSetAxi(&([[bd0]]), {{.*}} 0, {{.*}} 4, {{.*}} 0, {{.*}} 0, {{.*}} XAIE_ENABLE)); // CHECK: __mlir_aie_try(XAie_DmaSetNextBd(&([[bd0]]), {{.*}} 0, {{.*}} 1)); // CHECK: __mlir_aie_try(XAie_DmaEnableBd(&([[bd0]]))); // CHECK: __mlir_aie_try(XAie_DmaWriteBd(&(ctx->DevInst), &([[bd0]]), XAie_TileLoc(2,0), {{.*}} 0)); // CHECK: XAie_DmaDesc [[bd1:.*]]; // CHECK: __mlir_aie_try(XAie_DmaDescInit(&(ctx->DevInst), &([[bd1]]), XAie_TileLoc(2,0))); -// CHECK: __mlir_aie_try(XAie_DmaSetAddrLen(&([[bd1]]), {{.*}} mlir_aie_external_get_addr_myBuffer_20_1(), {{.*}} 4 * 4)); +// CHECK: __mlir_aie_try(XAie_DmaSetAddrLen(&([[bd1]]), {{.*}} mlir_aie_external_get_addr_myBuffer_20_1(), {{.*}} 16)); // CHECK: __mlir_aie_try(XAie_DmaSetAxi(&([[bd1]]), {{.*}} 0, {{.*}} 4, {{.*}} 0, {{.*}} 0, {{.*}} XAIE_ENABLE)); // CHECK: __mlir_aie_try(XAie_DmaSetNextBd(&([[bd1]]), {{.*}} 1, {{.*}} 1)); // CHECK: __mlir_aie_try(XAie_DmaEnableBd(&([[bd1]]))); diff --git a/test/Targets/AIEGenerateXAIE/shim_dma_packet.mlir b/test/Targets/AIEGenerateXAIE/shim_dma_packet.mlir index 2ccf0c2a1d..0d59fda357 100644 --- a/test/Targets/AIEGenerateXAIE/shim_dma_packet.mlir +++ b/test/Targets/AIEGenerateXAIE/shim_dma_packet.mlir @@ -14,7 +14,7 @@ // CHECK: XAie_DmaDesc dma_tile70_bd0; // CHECK: XAie_DmaDescInit(&(ctx->DevInst), &(dma_tile70_bd0), XAie_TileLoc(7,0)) // CHECK: XAie_DmaSetLock(&(dma_tile70_bd0), XAie_LockInit(0,1),XAie_LockInit(0,0)) -// CHECK: XAie_DmaSetAddrLen(&(dma_tile70_bd0), /* addrA */ mlir_aie_external_get_addr_myBuffer_70_0(), /* len */ 1024 * 4) +// CHECK: XAie_DmaSetAddrLen(&(dma_tile70_bd0), /* addrA */ mlir_aie_external_get_addr_myBuffer_70_0(), /* len */ 4096) // CHECK: XAie_DmaSetAxi(&(dma_tile70_bd0), /* smid */ 0, /* burstlen */ 4, /* QoS */ 0, /* Cache */ 0, /* Secure */ XAIE_ENABLE) // CHECK: XAie_DmaSetNextBd(&(dma_tile70_bd0), /* nextbd */ 0, /* enableNextBd */ 1) // CHECK: XAie_DmaSetPkt(&(dma_tile70_bd0), XAie_PacketInit(2,0)) diff --git a/test/Targets/AIEGenerateXAIE/test_xaie1.mlir b/test/Targets/AIEGenerateXAIE/test_xaie1.mlir index 92190151ee..7e587d3376 100644 --- a/test/Targets/AIEGenerateXAIE/test_xaie1.mlir +++ b/test/Targets/AIEGenerateXAIE/test_xaie1.mlir @@ -13,7 +13,7 @@ // CHECK: XAie_DmaDesc dma_tile33_bd0; // CHECK: __mlir_aie_try(XAie_DmaDescInit(&(ctx->DevInst), &(dma_tile33_bd0), XAie_TileLoc(3,3))); // CHECK: __mlir_aie_try(XAie_DmaSetLock(&(dma_tile33_bd0), XAie_LockInit(0,0),XAie_LockInit(0,1))); -// CHECK: __mlir_aie_try(XAie_DmaSetAddrLen(&(dma_tile33_bd0), {{.*}} 0x1400, {{.*}} 256 * 4)); +// CHECK: __mlir_aie_try(XAie_DmaSetAddrLen(&(dma_tile33_bd0), {{.*}} 0x1400, {{.*}} 1024)); // CHECK: __mlir_aie_try(XAie_DmaSetNextBd(&(dma_tile33_bd0), {{.*}} 0, {{.*}} 0)); // CHECK: __mlir_aie_try(XAie_DmaEnableBd(&(dma_tile33_bd0))); // CHECK: __mlir_aie_try(XAie_DmaWriteBd(&(ctx->DevInst), &(dma_tile33_bd0), XAie_TileLoc(3,3), {{.*}} 0)); diff --git a/test/Targets/AIEGenerateXAIE/test_xaie2.mlir b/test/Targets/AIEGenerateXAIE/test_xaie2.mlir index c6f4126224..500e0fb91f 100644 --- a/test/Targets/AIEGenerateXAIE/test_xaie2.mlir +++ b/test/Targets/AIEGenerateXAIE/test_xaie2.mlir @@ -14,14 +14,14 @@ // CHECK: XAie_DmaDesc [[bd0:.*]]; // CHECK: __mlir_aie_try(XAie_DmaDescInit(&(ctx->DevInst), &([[bd0]]), XAie_TileLoc(3,3))); // CHECK: __mlir_aie_try(XAie_DmaSetLock(&([[bd0]]), XAie_LockInit(0,0),XAie_LockInit(0,1))); -// CHECK: __mlir_aie_try(XAie_DmaSetAddrLen(&([[bd0]]), {{.*}}0x1000, {{.*}}256 * 4)); +// CHECK: __mlir_aie_try(XAie_DmaSetAddrLen(&([[bd0]]), {{.*}}0x1000, {{.*}}1024)); // CHECK: __mlir_aie_try(XAie_DmaSetNextBd(&([[bd0]]), {{.*}}1, {{.*}}1)); // CHECK: __mlir_aie_try(XAie_DmaEnableBd(&([[bd0]]))); // CHECK: __mlir_aie_try(XAie_DmaWriteBd(&(ctx->DevInst), &([[bd0]]), XAie_TileLoc(3,3), {{.*}}0)); // CHECK: XAie_DmaDesc [[bd1:.*]]; // CHECK: __mlir_aie_try(XAie_DmaDescInit(&(ctx->DevInst), &([[bd1]]), XAie_TileLoc(3,3))); // CHECK: __mlir_aie_try(XAie_DmaSetLock(&([[bd1]]), XAie_LockInit(0,0),XAie_LockInit(0,1))); -// CHECK: __mlir_aie_try(XAie_DmaSetAddrLen(&([[bd1]]), {{.*}}0x1400, {{.*}}4 * 4)); +// CHECK: __mlir_aie_try(XAie_DmaSetAddrLen(&([[bd1]]), {{.*}}0x1400, {{.*}}16)); // CHECK: __mlir_aie_try(XAie_DmaSetNextBd(&([[bd1]]), {{.*}}0, {{.*}}1)); // CHECK: __mlir_aie_try(XAie_DmaEnableBd(&([[bd1]]))); // CHECK: __mlir_aie_try(XAie_DmaWriteBd(&(ctx->DevInst), &([[bd1]]), XAie_TileLoc(3,3), {{.*}}1)); diff --git a/test/Targets/AIEGenerateXAIE/test_xaie4.mlir b/test/Targets/AIEGenerateXAIE/test_xaie4.mlir index 5f9e5cd8b4..2b1a2d8af0 100644 --- a/test/Targets/AIEGenerateXAIE/test_xaie4.mlir +++ b/test/Targets/AIEGenerateXAIE/test_xaie4.mlir @@ -14,14 +14,14 @@ // CHECK: XAie_DmaDesc [[bd0:.*]]; // CHECK: __mlir_aie_try(XAie_DmaDescInit(&(ctx->DevInst), &([[bd0]]), XAie_TileLoc(3,3))); // CHECK: __mlir_aie_try(XAie_DmaSetLock(&([[bd0]]), XAie_LockInit(0,1),XAie_LockInit(0,0))); -// CHECK: __mlir_aie_try(XAie_DmaSetAddrLen(&([[bd0]]), {{.*}}0x1000, {{.*}}256 * 4)); +// CHECK: __mlir_aie_try(XAie_DmaSetAddrLen(&([[bd0]]), {{.*}}0x1000, {{.*}}1024)); // CHECK: __mlir_aie_try(XAie_DmaSetNextBd(&([[bd0]]), {{.*}}0, {{.*}}0)); // CHECK: __mlir_aie_try(XAie_DmaEnableBd(&([[bd0]]))); // CHECK: __mlir_aie_try(XAie_DmaWriteBd(&(ctx->DevInst), &([[bd0]]), XAie_TileLoc(3,3), {{.*}}0)); // CHECK: XAie_DmaDesc [[bd1:.*]]; // CHECK: __mlir_aie_try(XAie_DmaDescInit(&(ctx->DevInst), &([[bd1]]), XAie_TileLoc(3,3))); // CHECK: __mlir_aie_try(XAie_DmaSetLock(&([[bd1]]), XAie_LockInit(1,1),XAie_LockInit(1,0))); -// CHECK: __mlir_aie_try(XAie_DmaSetAddrLen(&([[bd1]]), {{.*}}0x1400, {{.*}}256 * 4)); +// CHECK: __mlir_aie_try(XAie_DmaSetAddrLen(&([[bd1]]), {{.*}}0x1400, {{.*}}1024)); // CHECK: __mlir_aie_try(XAie_DmaSetNextBd(&([[bd1]]), {{.*}}0, {{.*}}0)); // CHECK: __mlir_aie_try(XAie_DmaEnableBd(&([[bd1]]))); // CHECK: __mlir_aie_try(XAie_DmaWriteBd(&(ctx->DevInst), &([[bd1]]), XAie_TileLoc(3,3), {{.*}}1)); diff --git a/test/assign-buffer-addresses/bad_alignment.mlir b/test/assign-buffer-addresses/bad_alignment.mlir new file mode 100644 index 0000000000..b9c2b83d7e --- /dev/null +++ b/test/assign-buffer-addresses/bad_alignment.mlir @@ -0,0 +1,95 @@ +//===- bad_alignment.mlir --------------------------------------*- MLIR -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2022 Xilinx Inc. +// +//===----------------------------------------------------------------------===// + +// RUN: aie-opt --verify-diagnostics --split-input-file %s + +module { + aie.device(ipu) { + %tile_0_1 = aie.tile(0, 1) + %memtile_dma_0_1 = aie.memtile_dma(%tile_0_1) { + %lock_0_1 = aie.lock(%tile_0_1) {init = 1 : i32} + %lock_0_1_0 = aie.lock(%tile_0_1) {init = 0 : i32} + %buffer_0_1 = aie.buffer(%tile_0_1) {address = 1 : i32} : memref<128xi16> + %0 = aie.dma(S2MM, 0) [{ + aie.use_lock(%lock_0_1, AcquireGreaterEqual) + // expected-error@+1 {{'aie.dma_bd' op bd address must be 4 byte (32b) aligned; got base+offset: 1 (bytes)}} + aie.dma_bd(%buffer_0_1 : memref<128xi16>, 0, 128) + aie.use_lock(%lock_0_1_0, Release) + }] + aie.end + } + } +} + +// ----- + +module { + aie.device(ipu) { + %tile_0_1 = aie.tile(0, 1) + %memtile_dma_0_1 = aie.memtile_dma(%tile_0_1) { + %lock_0_1 = aie.lock(%tile_0_1) {init = 1 : i32} + %lock_0_1_0 = aie.lock(%tile_0_1) {init = 0 : i32} + %buffer_0_1 = aie.buffer(%tile_0_1) {address = 1 : i32} : memref<128xi16> + %0 = aie.dma(S2MM, 0) [{ + aie.use_lock(%lock_0_1, AcquireGreaterEqual) + aie.dma_bd(%buffer_0_1 : memref<128xi16>, 3, 128) + // expected-error@above {{'aie.dma_bd' op bd address must be 4 byte (32b) aligned; got base+offset: 7 (bytes)}} + aie.use_lock(%lock_0_1_0, Release) + }] + aie.end + } + } +} + + +// ----- + +// Technically this should be in a "positive test" but it makes more sense here +// the "expected-above" in the previous test and the "expected-below" in the following test +// prevent false-positives/false-negatives (I think). + +module { + aie.device(ipu) { + %tile_0_1 = aie.tile(0, 1) + %memtile_dma_0_1 = aie.memtile_dma(%tile_0_1) { + %lock_0_1 = aie.lock(%tile_0_1) {init = 1 : i32} + %lock_0_1_0 = aie.lock(%tile_0_1) {init = 0 : i32} + %buffer_0_1 = aie.buffer(%tile_0_1) {address = 2 : i32} : memref<128xi16> + %0 = aie.dma(S2MM, 0) [{ + aie.use_lock(%lock_0_1, AcquireGreaterEqual) + // 2*6 + 2 = 8 bytes i.e., 4B aligned... + aie.dma_bd(%buffer_0_1 : memref<128xi16>, 3, 128) + aie.use_lock(%lock_0_1_0, Release) + }] + aie.end + } + } +} + + +// ----- + +module { + aie.device(ipu) { + %tile_0_1 = aie.tile(0, 1) + %memtile_dma_0_1 = aie.memtile_dma(%tile_0_1) { + %lock_0_1 = aie.lock(%tile_0_1) {init = 1 : i32} + %lock_0_1_0 = aie.lock(%tile_0_1) {init = 0 : i32} + %buffer_0_1 = aie.buffer(%tile_0_1) {address = 0 : i32} : memref<128xi16> + %0 = aie.dma(S2MM, 0) [{ + aie.use_lock(%lock_0_1, AcquireGreaterEqual) + // expected-error@below {{'aie.dma_bd' op bd address must be 4 byte (32b) aligned; got base+offset: 6 (bytes)}} + aie.dma_bd(%buffer_0_1 : memref<128xi16>, 3, 128) + aie.use_lock(%lock_0_1_0, Release) + }] + aie.end + } + } +} diff --git a/test/dialect/AIE/badtiledma2.mlir b/test/dialect/AIE/badtiledma2.mlir index ac37222545..d15d1d7879 100644 --- a/test/dialect/AIE/badtiledma2.mlir +++ b/test/dialect/AIE/badtiledma2.mlir @@ -9,7 +9,7 @@ //===----------------------------------------------------------------------===// // RUN: not %PYTHON aiecc.py %s 2>&1 | FileCheck %s -// CHECK: error{{.*}}'aie.dma_bd' op can only access a buffer in the same tile. +// CHECK: error{{.*}}'aie.dma_bd' op Core tile DMAs can only access a buffer in the same tile. module @test { %t63 = aie.tile(6, 3) diff --git a/test/dialect/AIE/nd-dma-bad-stride.mlir b/test/dialect/AIE/nd-dma-bad-stride.mlir new file mode 100644 index 0000000000..7848a7c268 --- /dev/null +++ b/test/dialect/AIE/nd-dma-bad-stride.mlir @@ -0,0 +1,31 @@ +//===- aie.mlir ------------------------------------------------*- MLIR -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2023, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +// RUN: aie-opt --verify-diagnostics %s + +module @tutorial_2b { + aie.device(xcve2802) { + %tile14 = aie.tile(1, 4) + %tile34 = aie.tile(3, 4) + + aie.flow(%tile14, DMA : 0, %tile34, DMA : 0) + %buf14 = aie.buffer(%tile14) : memref<128xi16> + %lock14_done = aie.lock(%tile14, 0) { init = 0 : i32 } + %mem14 = aie.mem(%tile14) { + %srcDma = aie.dma_start("MM2S", 0, ^bd0, ^end) + ^bd0: + // expected-error@+1 {{'aie.dma_bd' op For <32b width datatypes, inner-most dim stride must be 1}} + aie.dma_bd(%buf14 : memref<128xi16>, 0, 128, []) + aie.next_bd ^end + ^end: + aie.end + } + } +} \ No newline at end of file diff --git a/test/ipu-xrt/matrix_multiplication_using_dma/aie.mlir b/test/ipu-xrt/matrix_multiplication_using_dma/aie.mlir index d54eaa445c..01594e64cf 100644 --- a/test/ipu-xrt/matrix_multiplication_using_dma/aie.mlir +++ b/test/ipu-xrt/matrix_multiplication_using_dma/aie.mlir @@ -146,12 +146,12 @@ module { %1 = aie.dma_start(MM2S, 0, ^bb4, ^bb6) ^bb4: // 2 preds: ^bb3, ^bb5 aie.use_lock(%inA_cons_cons_lock, AcquireGreaterEqual) - aie.dma_bd(%inA_cons_buff_0 : memref<64x32xi16>, 0, 2048, [, , , ]) + aie.dma_bd(%inA_cons_buff_0 : memref<64x32xi16>, 0, 2048, [, , , ]) aie.use_lock(%inA_cons_prod_lock, Release) aie.next_bd ^bb5 ^bb5: // pred: ^bb4 aie.use_lock(%inA_cons_cons_lock, AcquireGreaterEqual) - aie.dma_bd(%inA_cons_buff_1 : memref<64x32xi16>, 0, 2048, [, , , ]) + aie.dma_bd(%inA_cons_buff_1 : memref<64x32xi16>, 0, 2048, [, , , ]) aie.use_lock(%inA_cons_prod_lock, Release) aie.next_bd ^bb4 ^bb6: // pred: ^bb3 @@ -170,12 +170,12 @@ module { %3 = aie.dma_start(MM2S, 1, ^bb10, ^bb12) ^bb10: // 2 preds: ^bb9, ^bb11 aie.use_lock(%inB_cons_cons_lock, AcquireGreaterEqual) - aie.dma_bd(%inB_cons_buff_0 : memref<32x64xi16>, 0, 2048, [, , , ]) + aie.dma_bd(%inB_cons_buff_0 : memref<32x64xi16>, 0, 2048, [, , , ]) aie.use_lock(%inB_cons_prod_lock, Release) aie.next_bd ^bb11 ^bb11: // pred: ^bb10 aie.use_lock(%inB_cons_cons_lock, AcquireGreaterEqual) - aie.dma_bd(%inB_cons_buff_1 : memref<32x64xi16>, 0, 2048, [, , , ]) + aie.dma_bd(%inB_cons_buff_1 : memref<32x64xi16>, 0, 2048, [, , , ]) aie.use_lock(%inB_cons_prod_lock, Release) aie.next_bd ^bb10 ^bb12: // pred: ^bb9 @@ -194,12 +194,12 @@ module { %5 = aie.dma_start(MM2S, 2, ^bb16, ^bb18) ^bb16: // 2 preds: ^bb15, ^bb17 aie.use_lock(%memC_cons_cons_lock, AcquireGreaterEqual) - aie.dma_bd(%memC_cons_buff_0 : memref<64x64xi16>, 0, 4096, [, , , ]) + aie.dma_bd(%memC_cons_buff_0 : memref<64x64xi16>, 0, 4096, [, , , ]) aie.use_lock(%memC_cons_prod_lock, Release) aie.next_bd ^bb17 ^bb17: // pred: ^bb16 aie.use_lock(%memC_cons_cons_lock, AcquireGreaterEqual) - aie.dma_bd(%memC_cons_buff_1 : memref<64x64xi16>, 0, 4096, [, , , ]) + aie.dma_bd(%memC_cons_buff_1 : memref<64x64xi16>, 0, 4096, [, , , ]) aie.use_lock(%memC_cons_prod_lock, Release) aie.next_bd ^bb16 ^bb18: // pred: ^bb15 diff --git a/test/objectFifo-stateful-transform/link_test_distribute.mlir b/test/objectFifo-stateful-transform/link_test_distribute.mlir index c95d8b6e10..7deda5fda2 100644 --- a/test/objectFifo-stateful-transform/link_test_distribute.mlir +++ b/test/objectFifo-stateful-transform/link_test_distribute.mlir @@ -88,24 +88,24 @@ // CHECK: %[[VAL_29:.*]] = aie.dma_start(MM2S, 1, ^bb7, ^bb9) // CHECK: ^bb7: // 2 preds: ^bb6, ^bb8 // CHECK: aie.use_lock(%[[VAL_20]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_17]] : memref<48xi32>, 64, 20) +// CHECK: aie.dma_bd(%[[VAL_17]] : memref<48xi32>, 16, 20) // CHECK: aie.use_lock(%[[VAL_19]], Release, 1) // CHECK: aie.next_bd ^bb8 // CHECK: ^bb8: // pred: ^bb7 // CHECK: aie.use_lock(%[[VAL_20]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_18]] : memref<48xi32>, 64, 20) +// CHECK: aie.dma_bd(%[[VAL_18]] : memref<48xi32>, 16, 20) // CHECK: aie.use_lock(%[[VAL_19]], Release, 1) // CHECK: aie.next_bd ^bb7 // CHECK: ^bb9: // pred: ^bb6 // CHECK: %[[VAL_30:.*]] = aie.dma_start(MM2S, 2, ^bb10, ^bb12) // CHECK: ^bb10: // 2 preds: ^bb9, ^bb11 // CHECK: aie.use_lock(%[[VAL_20]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_17]] : memref<48xi32>, 144, 12) +// CHECK: aie.dma_bd(%[[VAL_17]] : memref<48xi32>, 36, 12) // CHECK: aie.use_lock(%[[VAL_19]], Release, 1) // CHECK: aie.next_bd ^bb11 // CHECK: ^bb11: // pred: ^bb10 // CHECK: aie.use_lock(%[[VAL_20]], AcquireGreaterEqual, 1) -// CHECK: aie.dma_bd(%[[VAL_18]] : memref<48xi32>, 144, 12) +// CHECK: aie.dma_bd(%[[VAL_18]] : memref<48xi32>, 36, 12) // CHECK: aie.use_lock(%[[VAL_19]], Release, 1) // CHECK: aie.next_bd ^bb10 // CHECK: ^bb12: // pred: ^bb9