From cb25faec84940db986e11443f644956ca975a23f Mon Sep 17 00:00:00 2001 From: Maksim Levental Date: Tue, 26 Sep 2023 02:40:18 -0400 Subject: [PATCH] Revert "catch up to TOM MLIR (#590)" (#656) This reverts commit 47ff7d38a89372c02e22515e61a696f2a3e93013. --- include/aie/AIETokenAnalysis.h | 2 +- include/aie/Dialect/ADF/ADF.td | 1 + include/aie/Dialect/AIE/AIENetlistAnalysis.h | 2 +- include/aie/Dialect/AIE/IR/AIE.td | 91 ++++++++++--------- include/aie/Dialect/AIE/IR/AIEInterfaces.td | 12 +-- include/aie/Dialect/AIE/IR/AIETargetModel.h | 25 ++--- .../Dialect/AIE/Transforms/AIEFindFlows.td | 6 +- .../Transforms/AIENormalizeAddressSpaces.td | 10 +- include/aie/Dialect/AIEVec/AIEVecUtils.h | 11 ++- .../aie/Dialect/AIEVec/IR/AIEVecDialect.td | 1 + include/aie/Dialect/AIEVec/IR/AIEVecOps.h | 4 +- .../aie/Dialect/AIEVec/Transforms/Passes.h | 6 +- include/aie/Dialect/AIEX/AIETokenAnalysis.h | 2 +- include/aie/Dialect/AIEX/IR/AIEX.td | 1 + lib/Conversion/PassDetail.h | 2 +- lib/Dialect/AIE/IR/AIEDialect.cpp | 8 ++ lib/Dialect/AIE/IR/AIETargetModel.cpp | 32 +++---- lib/Dialect/AIE/Transforms/AIEFindFlows.cpp | 2 +- .../Transforms/AIENormalizeAddressSpaces.cpp | 2 +- .../AIEObjectFifoRegisterProcess.cpp | 9 +- .../AIEObjectFifoStatefulTransform.cpp | 81 ++++++++--------- lib/Dialect/AIE/Transforms/AIEVectorOpt.cpp | 4 +- lib/Dialect/AIE/Utils/AIENetlistAnalysis.cpp | 2 +- .../AIEVec/Transforms/AIEVectorize.cpp | 35 +++---- .../Transforms/ConvertVectorToAIEVec.cpp | 11 +-- .../Transforms/FoldMulAddChainToConvOp.cpp | 4 +- .../Transforms/VectorToAIEVecConversions.cpp | 8 +- .../Transforms/VectorToVectorConversions.cpp | 26 +++--- lib/Dialect/AIEVec/Utils/Utils.cpp | 4 +- lib/Targets/AIETargetXAIEV2.cpp | 4 +- lib/Targets/AIETargets.cpp | 6 +- python/aie/compiler/aiecc/main.py | 4 +- python/aie/dialects/AieBinding.td | 1 + .../AIEVecToLLVM/test-upd_large.mlir | 1 - .../AIEVecToLLVM/test-upd_small.mlir | 1 - test/aievec/test_linalg_conv2d.mlir | 2 +- tools/aie-opt/aie-opt.cpp | 5 +- utils/clone-llvm.sh | 2 +- 38 files changed, 213 insertions(+), 217 deletions(-) diff --git a/include/aie/AIETokenAnalysis.h b/include/aie/AIETokenAnalysis.h index e4ef3c3d74..c22b57f9d5 100644 --- a/include/aie/AIETokenAnalysis.h +++ b/include/aie/AIETokenAnalysis.h @@ -15,11 +15,11 @@ #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Dialect.h" +#include "mlir/IR/FunctionImplementation.h" #include "mlir/IR/OpDefinition.h" #include "mlir/IR/OpImplementation.h" #include "mlir/IR/TypeSupport.h" #include "mlir/IR/Types.h" -#include "mlir/Interfaces/FunctionImplementation.h" #include "llvm/ADT/StringSwitch.h" #include diff --git a/include/aie/Dialect/ADF/ADF.td b/include/aie/Dialect/ADF/ADF.td index c4406c357d..a91ce08054 100644 --- a/include/aie/Dialect/ADF/ADF.td +++ b/include/aie/Dialect/ADF/ADF.td @@ -33,6 +33,7 @@ def ADF_Dialect : Dialect { }]; let cppNamespace = "::xilinx::ADF"; let useDefaultTypePrinterParser = 1; + let useFoldAPI = kEmitFoldAdaptorFolder; } //===----------------------------------------------------------------------===// diff --git a/include/aie/Dialect/AIE/AIENetlistAnalysis.h b/include/aie/Dialect/AIE/AIENetlistAnalysis.h index 134ef1c712..cc958d9212 100644 --- a/include/aie/Dialect/AIE/AIENetlistAnalysis.h +++ b/include/aie/Dialect/AIE/AIENetlistAnalysis.h @@ -17,11 +17,11 @@ #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Dialect.h" +#include "mlir/IR/FunctionImplementation.h" #include "mlir/IR/OpDefinition.h" #include "mlir/IR/OpImplementation.h" #include "mlir/IR/TypeSupport.h" #include "mlir/IR/Types.h" -#include "mlir/Interfaces/FunctionImplementation.h" #include "llvm/ADT/StringSwitch.h" #include diff --git a/include/aie/Dialect/AIE/IR/AIE.td b/include/aie/Dialect/AIE/IR/AIE.td index b1eef778cf..bd31f20e3f 100644 --- a/include/aie/Dialect/AIE/IR/AIE.td +++ b/include/aie/Dialect/AIE/IR/AIE.td @@ -8,16 +8,22 @@ // //===----------------------------------------------------------------------===// -#ifndef AIE_OPS -#define AIE_OPS - +#ifdef OP_BASE +#else include "mlir/IR/OpBase.td" +#endif // OP_BASE + include "mlir/IR/AttrTypeBase.td" include "mlir/IR/EnumAttr.td" + +#ifdef AIE_OPS +#else +#define AIE_OPS +#endif + include "mlir/IR/SymbolInterfaces.td" include "mlir/Interfaces/CallInterfaces.td" include "mlir/Interfaces/SideEffectInterfaces.td" - include "aie/Dialect/AIE/IR/AIEInterfaces.td" def AIE_Dialect : Dialect { @@ -35,6 +41,7 @@ switch is referred to as `switchbox` to avoid confusion with the }]; let useDefaultTypePrinterParser = 1; let useDefaultAttributePrinterParser = 1; + let useFoldAPI = kEmitFoldAdaptorFolder; } @@ -792,7 +799,7 @@ def AIE_DMABDPACKETOp: AIE_Op<"dmaBdPacket", []> { def AIE_DimTupleAttr : AttrDef { let mnemonic = "DimTuple"; - let summary = + let summary = "Tuple encoding the stride and wrap of one dimension in an " "AIE2 n-dimensional buffer descriptor"; let parameters = (ins @@ -831,10 +838,10 @@ def AIE_DMABDOp: AIE_Op<"dmaBd", []> { A DMA channel in a Memory Module can process one block descriptor after another by chaining them. There are 16 block descriptors per Memory Module. They are shared by four DMA channels. - On AIE-ML devices, an optional argument can be used to specify an array of + On AIE-ML devices, an optional argument can be used to specify an array of step sizes and wraps to move data in more advanced patterns. Strides and wraps are specified as tuples ``, and up to three dimensions - can be specified (or up to four dimensions on memtiles). + can be specified (or up to four dimensions on memtiles). The first element of the array gives the _highest-dimension_ stride and wrap, the last element of the array gives the lowest-dimension. @@ -845,7 +852,7 @@ def AIE_DMABDOp: AIE_Op<"dmaBd", []> { AIE.dmaBd(<%buf : memref<128xi32>, 0, 128>, 0, [<16, 8>, <1, 2>, <2, 8>]) ``` - This corresponds to alternating between even and odd elements of the + This corresponds to alternating between even and odd elements of the buffer/stream every 8 elements, like so, equivalent to nested loops like so: ``` @@ -976,8 +983,7 @@ def AIE_MemOp: AIE_Op<"mem", [TileElement, FlowEndPoint, CallableOpInterface, Is int maxSizeInBytes() { return 32768; } // CallableOpInterface Region *getCallableRegion(); - ArrayRef getArgumentTypes() { return getOperand().getType(); } - ArrayRef getResultTypes() { return getType(); } + ArrayRef getCallableResults(); static StringRef getDefaultDialect() { return "AIE"; } }]; let builders = [ @@ -1028,8 +1034,7 @@ def AIE_MemTileDMAOp: AIE_Op<"memTileDMA", [TileElement, FlowEndPoint, CallableO TileOp getTileOp(); // CallableOpInterface Region *getCallableRegion(); - ArrayRef getArgumentTypes() { return getOperand().getType(); } - ArrayRef getResultTypes() { return getType(); } + ArrayRef getCallableResults(); static StringRef getDefaultDialect() { return "AIE"; } }]; let builders = [ @@ -1358,50 +1363,50 @@ def AIE_ShimDMAAllocationOp : AIE_Op<"shimDMAAllocation", [HasParent<"DeviceOp"> ); let results = (outs); let assemblyFormat = [{ - $sym_name `(` $channelDir `,` $channelIndex `,` $col `)` attr-dict + $sym_name `(` $channelDir `,` $channelIndex `,` $col `)` attr-dict }]; } def AIE_ObjectFifoCreateOp: AIE_Op<"objectFifo", [HasParent<"DeviceOp">, Symbol]> { let summary = "Create a circular buffer or channel between two tiles"; let description = [{ - The `aie.objectFifo` operation creates a circular buffer established between a producer and one or - more consumers, which are `aie.tile` operations. The`aie.objectFifo` instantiates the given number of - buffers (of given output type) and their locks in the Memory Module of the appropriate tile(s) after - lowering, based on tile-adjacency. These elements represent the conceptual depth of the `objectFifo` or, + The `aie.objectFifo` operation creates a circular buffer established between a producer and one or + more consumers, which are `aie.tile` operations. The`aie.objectFifo` instantiates the given number of + buffers (of given output type) and their locks in the Memory Module of the appropriate tile(s) after + lowering, based on tile-adjacency. These elements represent the conceptual depth of the `objectFifo` or, more specifically, of its object pool. - For the producer and for each consumer, a different size (i.e., element number) can be specified as an - array of integer values. This will take effect in the case of consumers placed on tiles non-adjacent to - the producer. Otherwise, the producer size will be applied. If a single size is specified, it will be + For the producer and for each consumer, a different size (i.e., element number) can be specified as an + array of integer values. This will take effect in the case of consumers placed on tiles non-adjacent to + the producer. Otherwise, the producer size will be applied. If a single size is specified, it will be applied to both producer and consumers. - This operation is then converted by the `AIEObjectFifoStatefulTransformPass` into `aie.buffers` and their associated + This operation is then converted by the `AIEObjectFifoStatefulTransformPass` into `aie.buffers` and their associated `aie.locks`. The pass also establishes Flow and DMA operations between the producer and consumer tiles if they are not adjacent. 1-to-1 tile example: ``` - AIE.objectFifo @of1 (%tile12, { %tile23 }, 4 : i32) : !AIE.objectFifo> + AIE.objectFifo @of1 (%tile12, { %tile23 }, 4 : i32) : !AIE.objectFifo> ``` This operation creates an `objectFifo` between `%tile12` and `%tile23` of 4 elements, each a buffer of 16 32-bit integers. - Note: If there are no `ObjectFifoAcquireOps` corresponding to this `objectFifo` on the cores of `%tile12` and `%tile23`, + Note: If there are no `ObjectFifoAcquireOps` corresponding to this `objectFifo` on the cores of `%tile12` and `%tile23`, then the depths of the object pools on each tile will be 4, as specified. Otherwise, the cores are scanned and the highest number of acquired elements (+1 for prefetching) will be used instead, to ensure minimal resource usage. - + 1-to-2 tiles broadcast example: ``` - AIE.objectFifo @of2 (%tile12, { %tile13, %tile23 }, 4 : i32) : !AIE.objectFifo> + AIE.objectFifo @of2 (%tile12, { %tile13, %tile23 }, 4 : i32) : !AIE.objectFifo> ``` - This operation creates an `objectFifo` between `%tile12` and tiles `%tile13`, `%tile23` of 4 elements, each a buffer of x16 + This operation creates an `objectFifo` between `%tile12` and tiles `%tile13`, `%tile23` of 4 elements, each a buffer of x16 32-bit integers. 1-to-2 tiles broadcast with explicit sizes example: ``` - AIE.objectFifo @of3 (%tile12, { %tile13, %tile23 }, [2, 3, 4]) : !AIE.objectFifo> + AIE.objectFifo @of3 (%tile12, { %tile13, %tile23 }, [2, 3, 4]) : !AIE.objectFifo> ``` - This operation creates an `objectFifo` between `%tile12`, `%tile13` and `%tile23`. The depths of the `objectFifo` object pool - at each tile are respectively 2, 3 and 4 for tiles `%tile12`, `%tile13` and `%tile23`. This overrides the depth analysis + This operation creates an `objectFifo` between `%tile12`, `%tile13` and `%tile23`. The depths of the `objectFifo` object pool + at each tile are respectively 2, 3 and 4 for tiles `%tile12`, `%tile13` and `%tile23`. This overrides the depth analysis specified in the first example. }]; @@ -1454,7 +1459,7 @@ def AIE_ObjectFifoLinkOp: AIE_Op<"objectFifo.link", [HasParent<"DeviceOp">]> { To achieve a broadcast pattern through the link tile, the output `objectFifo` should have a list of all the consumers tiles. To achieve a distribute pattern from the link tile, there should be multiple output `objectFifos` in the LinkOp. In this case, - parts will be taken out of the input `objectFifo`'s buffers based on the sizes of the output `objectFifos`, in the order they + parts will be taken out of the input `objectFifo`'s buffers based on the sizes of the output `objectFifos`, in the order they were given in the LinkOp. The join pattern is the exact inverse of the distribute one. }]; @@ -1480,7 +1485,7 @@ def AIE_ObjectFifoLinkOp: AIE_Op<"objectFifo.link", [HasParent<"DeviceOp">]> { bool isDistribute() { return getFifoOuts().size() > 1; } - std::optional getOptionalSharedTile(); + mlir::Optional getOptionalSharedTile(); }]; } @@ -1525,14 +1530,14 @@ def AIE_ObjectFifoAcquireOp: AIE_Op<"objectFifo.acquire", []> { let summary = "Acquire operation to lock and return objects of an ObjectFifo"; let description = [{ The `aie.objectFifo.acquire` operation first acquires the locks of the next given number - of objects in the `objectFifo`. The mode it acquires the locks in is chosen based on the port + of objects in the `objectFifo`. The mode it acquires the locks in is chosen based on the port (producer: acquire for write, consumer: acquire for read). Then, it returns a subview of the acquired objects which can be used to access them. - This operation is then converted by the `AIEObjectFifoStatefulTransformPass` into `aie.useLock` operations on + This operation is then converted by the `AIEObjectFifoStatefulTransformPass` into `aie.useLock` operations on the locks of the `objectFifo` objects that will be acquired. Under the hood, the operation only performs new acquires if necessary. For example, if two objects have been acquired in the past and none have yet - to be released by the same process, then performing another acquire operation on the same `objectFifo` + to be released by the same process, then performing another acquire operation on the same `objectFifo` within the same process of size two or less will not result in any new useLock operations (and for size greater than two, only (size - 2) useLock operations will be performed). @@ -1540,7 +1545,7 @@ def AIE_ObjectFifoAcquireOp: AIE_Op<"objectFifo.acquire", []> { ``` %subview = AIE.objectFifo.acquire @of1 (Consume, 2) : !AIE.objectFifoSubview> ``` - This operation acquires the locks of the next two objects in the `objectFifo` named `@of1` from its consumer + This operation acquires the locks of the next two objects in the `objectFifo` named `@of1` from its consumer port and returns a subview of the acquired objects. }]; @@ -1568,7 +1573,7 @@ def AIE_ObjectFifoReleaseOp: AIE_Op<"objectFifo.release", []> { let summary = "Release operation for object locks in an ObjectFifo"; let description = [{ The `aie.objectFifo.release` operation releases the locks of the given number of objects - in the `objectFifo`. The mode it releases the locks in is chosen based on the `port` + in the `objectFifo`. The mode it releases the locks in is chosen based on the `port` (producer: release for read, consumer: release for write). This operation is then converted by the `AIEObjectFifoStatefulTransformPass` into `aie.useLock` operations. @@ -1587,7 +1592,7 @@ def AIE_ObjectFifoReleaseOp: AIE_Op<"objectFifo.release", []> { ); let assemblyFormat = [{ - attr-dict $objFifo_name `(` $port `,` $size `)` + attr-dict $objFifo_name `(` $port `,` $size `)` }]; let hasVerifier = 1; @@ -1608,7 +1613,7 @@ def AIE_ObjectFifoSubviewAccessOp : AIE_Op<"objectFifo.subview.access", []> { %subview = AIE.objectFifo.acquire @of1 (Produce, 3) : !AIE.objectFifoSubview> %elem = AIE.objectFifo.subview.access %subview[0] : !AIE.objectFifoSubview> -> memref<16xi32> ``` - In this example, %elem is the first object of the subview. Note that this may not correspond to the first element of + In this example, %elem is the first object of the subview. Note that this may not correspond to the first element of the `objectFifo` if other acquire operations took place beforehand. }]; @@ -1635,9 +1640,9 @@ def AIE_ObjectFifoSubviewAccessOp : AIE_Op<"objectFifo.subview.access", []> { def AIE_ObjectFifoRegisterProcessOp: AIE_Op<"objectFifo.registerProcess", []> { let summary = "Operation that produces the acquire/release patterns for a process registered to an objectFifo"; let description = [{ - The `aie.registerProcess` operation allows the user to register a function to an `objectFifo` along with its + The `aie.registerProcess` operation allows the user to register a function to an `objectFifo` along with its acquire and release patterns. These patterns will be used to generate a sequence of acquires and releases - on the `objectFifo` elements. This generated sequence is often in the form of a for loop, however, in the case + on the `objectFifo` elements. This generated sequence is often in the form of a for loop, however, in the case of cyclo-static patterns only the repetition of same number accesses and releases will generate a for loop. This may result in multiple for loops of different sizes being generated. If there is no repetition, then no loops will be generated. @@ -1652,7 +1657,7 @@ def AIE_ObjectFifoRegisterProcessOp: AIE_Op<"objectFifo.registerProcess", []> { AIE.objectFifo.registerProcess @of1 (Produce, %acquirePatternProducer : tensor<4xi32>, %releasePatternProducer : tensor<4xi32>, @producer_work, %length) ``` - This operation registers function @producer_work and associated patterns to the produce end of @of1. + This operation registers function @producer_work and associated patterns to the produce end of @of1. @producer_work will be called with the subviews produced when acquiring elements from @of1 following the acquire pattern. If the input patterns are static (only one element) then the length of the produced for loop will be that of the input %length. @@ -1669,7 +1674,7 @@ def AIE_ObjectFifoRegisterProcessOp: AIE_Op<"objectFifo.registerProcess", []> { ); let assemblyFormat = [{ - attr-dict $objFifo_name `(` $port `,` $acquirePatternTensor `:` type($acquirePatternTensor) `,` $releasePatternTensor `:` type($releasePatternTensor) `,` $callee `,` $length`)` + attr-dict $objFifo_name `(` $port `,` $acquirePatternTensor `:` type($acquirePatternTensor) `,` $releasePatternTensor `:` type($releasePatternTensor) `,` $callee `,` $length`)` }]; let hasVerifier = 1; @@ -1681,5 +1686,3 @@ def AIE_ObjectFifoRegisterProcessOp: AIE_Op<"objectFifo.registerProcess", []> { int getProcessLength() { return getLength().getDefiningOp().getValue().cast().getInt(); } }]; } - -#endif // AIE_OPS \ No newline at end of file diff --git a/include/aie/Dialect/AIE/IR/AIEInterfaces.td b/include/aie/Dialect/AIE/IR/AIEInterfaces.td index 6c2f06e38d..49d6641285 100644 --- a/include/aie/Dialect/AIE/IR/AIEInterfaces.td +++ b/include/aie/Dialect/AIE/IR/AIEInterfaces.td @@ -8,11 +8,11 @@ // //===----------------------------------------------------------------------===// - -#ifndef AIE_INTERFACES -#define AIE_INTERFACES - +#ifdef OP_BASE +#else include "mlir/IR/OpBase.td" +#endif // OP_BASE + include "mlir/IR/EnumAttr.td" // Op is a DMA-like operation with BD contraints @@ -179,6 +179,4 @@ def AIEDevice: I32EnumAttr<"AIEDevice", "AIE Device", [xcvc1902, xcve2302, xcve2802]> { let cppNamespace = "xilinx::AIE"; -} - -#endif // AIE_INTERFACES \ No newline at end of file +} \ No newline at end of file diff --git a/include/aie/Dialect/AIE/IR/AIETargetModel.h b/include/aie/Dialect/AIE/IR/AIETargetModel.h index c0cd7a8d87..d2f8880be1 100644 --- a/include/aie/Dialect/AIE/IR/AIETargetModel.h +++ b/include/aie/Dialect/AIE/IR/AIETargetModel.h @@ -12,6 +12,7 @@ #define MLIR_AIE_DEVICEMODEL_H #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallSet.h" #include "aie/Dialect/AIE/IR/AIEEnums.h" @@ -65,16 +66,16 @@ class AIETargetModel { /// Return the tile ID of the memory to the west of the given tile, if it /// exists. - virtual std::optional getMemWest(TileID src) const = 0; + virtual llvm::Optional getMemWest(TileID src) const = 0; /// Return the tile ID of the memory to the east of the given tile, if it /// exists. - virtual std::optional getMemEast(TileID src) const = 0; + virtual llvm::Optional getMemEast(TileID src) const = 0; /// Return the tile ID of the memory to the north of the given tile, if it /// exists. - virtual std::optional getMemNorth(TileID src) const = 0; + virtual llvm::Optional getMemNorth(TileID src) const = 0; /// Return the tile ID of the memory to the south of the given tile, if it /// exists. - virtual std::optional getMemSouth(TileID src) const = 0; + virtual llvm::Optional getMemSouth(TileID src) const = 0; /// Return true if src is the internal memory of dst bool isInternal(int srcCol, int srcRow, int dstCol, int dstRow) const { @@ -165,10 +166,10 @@ class AIE1TargetModel : public AIETargetModel { AIEArch getTargetArch() const override; - std::optional getMemWest(TileID src) const override; - std::optional getMemEast(TileID src) const override; - std::optional getMemNorth(TileID src) const override; - std::optional getMemSouth(TileID src) const override; + llvm::Optional getMemWest(TileID src) const override; + llvm::Optional getMemEast(TileID src) const override; + llvm::Optional getMemNorth(TileID src) const override; + llvm::Optional getMemSouth(TileID src) const override; bool isMemWest(int srcCol, int srcRow, int dstCol, int dstRow) const override; bool isMemEast(int srcCol, int srcRow, int dstCol, int dstRow) const override; @@ -215,10 +216,10 @@ class AIE2TargetModel : public AIETargetModel { AIEArch getTargetArch() const override; - std::optional getMemWest(TileID src) const override; - std::optional getMemEast(TileID src) const override; - std::optional getMemNorth(TileID src) const override; - std::optional getMemSouth(TileID src) const override; + llvm::Optional getMemWest(TileID src) const override; + llvm::Optional getMemEast(TileID src) const override; + llvm::Optional getMemNorth(TileID src) const override; + llvm::Optional getMemSouth(TileID src) const override; bool isMemWest(int srcCol, int srcRow, int dstCol, int dstRow) const override; bool isMemEast(int srcCol, int srcRow, int dstCol, int dstRow) const override; diff --git a/include/aie/Dialect/AIE/Transforms/AIEFindFlows.td b/include/aie/Dialect/AIE/Transforms/AIEFindFlows.td index e766e7cf1f..6f475cdf87 100644 --- a/include/aie/Dialect/AIE/Transforms/AIEFindFlows.td +++ b/include/aie/Dialect/AIE/Transforms/AIEFindFlows.td @@ -11,9 +11,9 @@ #ifndef AIE_FIND_FLOWS #define AIE_FIND_FLOWS -include "mlir/IR/PatternBase.td" -include "aie/Dialect/AIE/IR/AIE.td" +include "AIE.td" -def : Pat<(AIE_WireOp (AIE_CoreOp:$core $x, $y), $port, (AIE_SwitchboxOp:$switch $x, $y), $b), (AIE_FlowOp $core, $port)>; +def : Pat<(aie_WireOp (aie_CoreOp:$core $x, $y), $port, (aie_SwitchboxOp:$switch $x, $y), $b), + (aie_FlowOp $core, $port)>; #endif // AIE_FIND_FLOWS diff --git a/include/aie/Dialect/AIE/Transforms/AIENormalizeAddressSpaces.td b/include/aie/Dialect/AIE/Transforms/AIENormalizeAddressSpaces.td index 49e3eeacf4..602e4dd0f6 100644 --- a/include/aie/Dialect/AIE/Transforms/AIENormalizeAddressSpaces.td +++ b/include/aie/Dialect/AIE/Transforms/AIENormalizeAddressSpaces.td @@ -19,11 +19,9 @@ def toDefaultAddressSpace : NativeCodeCall<"TypeAttr::get(memRefToDefaultAddress def hasNonDefaultAddressSpace : Constraint< CPred<"$0.getValue().cast().getMemorySpace() != 0">, "has non-default address space">; -def : Pat< - /*pattern*/ (MemRef_GlobalOp $sym_name, $sym_visibility, $type, $initial_value, $constant, $attrs), - /*result*/ (MemRef_GlobalOp $sym_name, $sym_visibility, (toDefaultAddressSpace $type), $initial_value, $constant, $attrs), - /*preds*/ [(hasNonDefaultAddressSpace $type)], - /*supplemental_results*/ [], - /*benefitAdded*/ (addBenefit 20)>; +def : Pat<(MemRef_GlobalOp $sym_name, $sym_visibility, $type, $initial_value, $constant, $attrs), + (MemRef_GlobalOp $sym_name, $sym_visibility, (toDefaultAddressSpace $type), $initial_value, $constant, $attrs), + [(hasNonDefaultAddressSpace $type)], + (addBenefit 20)>; #endif // AIE_NORMALIZE_ADDRESS_SPACES diff --git a/include/aie/Dialect/AIEVec/AIEVecUtils.h b/include/aie/Dialect/AIEVec/AIEVecUtils.h index 9df6e9535e..5fd24b774b 100644 --- a/include/aie/Dialect/AIEVec/AIEVecUtils.h +++ b/include/aie/Dialect/AIEVec/AIEVecUtils.h @@ -43,7 +43,7 @@ inline VectorType createVectorType(unsigned lanes, Type elementType) { // Return the size (in bits) of the underlying element type of the vector inline int32_t getElementSizeInBits(VectorType type) { - return type.cast().getElementTypeBitWidth(); + return type.cast().getSizeInBits() / type.getNumElements(); } // Return the number of lanes along the vectorized dimension for the vector @@ -142,15 +142,16 @@ inline AffineExpr flattenedStridedExpr(ArrayRef sizes, // Construct a linearized affine expression for the upd op. inline AffineExpr constructLinearizedAffineExprForUPDOp(aievec::UPDOp updOp) { + SmallVector indices(updOp.getIndices().begin(), + updOp.getIndices().end()); MemRefType memRefType = updOp.getSource().getType().cast(); MLIRContext *context = memRefType.getContext(); SmallVector exprVec; - llvm::SmallDenseMap indexToExprDimMap; - for (auto idxAndValue : llvm::enumerate(updOp.getIndices())) { + DenseMap indexToExprDimMap; + for (auto idxAndValue : llvm::enumerate(indices)) { auto value = idxAndValue.value(); - if (affine::AffineApplyOp apOf = - value.getDefiningOp()) { + if (AffineApplyOp apOf = value.getDefiningOp()) { AffineMap map = apOf.getAffineMap(); // Cannot create linearized affineExpr for complicated index. if (map.getNumResults() != 1) { diff --git a/include/aie/Dialect/AIEVec/IR/AIEVecDialect.td b/include/aie/Dialect/AIEVec/IR/AIEVecDialect.td index 4741fdba97..d4868d47ab 100644 --- a/include/aie/Dialect/AIEVec/IR/AIEVecDialect.td +++ b/include/aie/Dialect/AIEVec/IR/AIEVecDialect.td @@ -22,6 +22,7 @@ def AIEVec_Dialect : Dialect { let extraClassDeclaration = [{ void registerTypes(); }]; + let useFoldAPI = kEmitFoldAdaptorFolder; } #endif // AIEVEC_DIALECT diff --git a/include/aie/Dialect/AIEVec/IR/AIEVecOps.h b/include/aie/Dialect/AIEVec/IR/AIEVecOps.h index e686e43701..15b240a32c 100644 --- a/include/aie/Dialect/AIEVec/IR/AIEVecOps.h +++ b/include/aie/Dialect/AIEVec/IR/AIEVecOps.h @@ -13,10 +13,8 @@ #ifndef AIE_DIALECT_AIEVEC_IR_AIEVECOPS_H #define AIE_DIALECT_AIEVEC_IR_AIEVECOPS_H -#include "mlir/Bytecode/BytecodeOpInterface.h" -#include "mlir/Interfaces/SideEffectInterfaces.h" - #include "AIEVecDialect.h" +#include "mlir/Interfaces/SideEffectInterfaces.h" #define GET_OP_CLASSES #include "aie/Dialect/AIEVec/IR/AIEVecOps.h.inc" diff --git a/include/aie/Dialect/AIEVec/Transforms/Passes.h b/include/aie/Dialect/AIEVec/Transforms/Passes.h index 5d8e65f247..68299fb5ca 100644 --- a/include/aie/Dialect/AIEVec/Transforms/Passes.h +++ b/include/aie/Dialect/AIEVec/Transforms/Passes.h @@ -19,9 +19,7 @@ namespace mlir { -namespace affine { class AffineDialect; -} namespace func { class FuncDialect; @@ -58,13 +56,11 @@ class FuncOp; namespace xilinx { namespace aievec { -using mlir::affine::AffineDialect; - #define GEN_PASS_DECL #define GEN_PASS_CLASSES #include "aie/Dialect/AIEVec/Transforms/Passes.h.inc" -std::unique_ptr createAIEVectorizePass(); +std::unique_ptr createAIEVectorizePass(); /// Generate the code for registering passes. #define GEN_PASS_REGISTRATION diff --git a/include/aie/Dialect/AIEX/AIETokenAnalysis.h b/include/aie/Dialect/AIEX/AIETokenAnalysis.h index 40234aebaa..6d7c1290f4 100644 --- a/include/aie/Dialect/AIEX/AIETokenAnalysis.h +++ b/include/aie/Dialect/AIEX/AIETokenAnalysis.h @@ -16,11 +16,11 @@ #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Dialect.h" +#include "mlir/IR/FunctionImplementation.h" #include "mlir/IR/OpDefinition.h" #include "mlir/IR/OpImplementation.h" #include "mlir/IR/TypeSupport.h" #include "mlir/IR/Types.h" -#include "mlir/Interfaces/FunctionImplementation.h" #include "llvm/ADT/StringSwitch.h" #include diff --git a/include/aie/Dialect/AIEX/IR/AIEX.td b/include/aie/Dialect/AIEX/IR/AIEX.td index 0e2c2c20ce..393d382c09 100644 --- a/include/aie/Dialect/AIEX/IR/AIEX.td +++ b/include/aie/Dialect/AIEX/IR/AIEX.td @@ -36,6 +36,7 @@ to the more mature AIE dialect. }]; let useDefaultTypePrinterParser = 1; + let useFoldAPI = kEmitFoldAdaptorFolder; } diff --git a/lib/Conversion/PassDetail.h b/lib/Conversion/PassDetail.h index c8b7eb3da4..41e721c0c1 100644 --- a/lib/Conversion/PassDetail.h +++ b/lib/Conversion/PassDetail.h @@ -12,7 +12,7 @@ #define AIE_CONVERSION_PASSDETAIL_H_ #include "mlir/IR/BuiltinOps.h" -#include "mlir/Interfaces/FunctionInterfaces.h" +#include "mlir/IR/FunctionInterfaces.h" #include "mlir/Pass/Pass.h" namespace xilinx { diff --git a/lib/Dialect/AIE/IR/AIEDialect.cpp b/lib/Dialect/AIE/IR/AIEDialect.cpp index f15c678720..82f558c0d0 100644 --- a/lib/Dialect/AIE/IR/AIEDialect.cpp +++ b/lib/Dialect/AIE/IR/AIEDialect.cpp @@ -1057,6 +1057,9 @@ int xilinx::AIE::MemOp::rowIndex() { return getTileOp().rowIndex(); } /// function. Region *xilinx::AIE::MemOp::getCallableRegion() { return &(getBody()); } +/// Returns the results types that the callable region produces when executed. +ArrayRef xilinx::AIE::MemOp::getCallableResults() { return getType(); } + // MemTileDMAOp LogicalResult xilinx::AIE::MemTileDMAOp::verify() { assert(getOperation()->getNumRegions() == 1 && @@ -1217,6 +1220,11 @@ int xilinx::AIE::MemTileDMAOp::rowIndex() { return getTileOp().rowIndex(); } /// function. Region *xilinx::AIE::MemTileDMAOp::getCallableRegion() { return &(getBody()); } +/// Returns the results types that the callable region produces when executed. +ArrayRef xilinx::AIE::MemTileDMAOp::getCallableResults() { + return getType(); +} + // SwitchboxOp xilinx::AIE::TileOp xilinx::AIE::SwitchboxOp::getTileOp() { return cast(getTile().getDefiningOp()); diff --git a/lib/Dialect/AIE/IR/AIETargetModel.cpp b/lib/Dialect/AIE/IR/AIETargetModel.cpp index e974514384..a027461cad 100644 --- a/lib/Dialect/AIE/IR/AIETargetModel.cpp +++ b/lib/Dialect/AIE/IR/AIETargetModel.cpp @@ -26,9 +26,9 @@ AIETargetModel::~AIETargetModel() {} AIEArch AIE1TargetModel::getTargetArch() const { return AIEArch::AIE1; } // Return the tile ID of the memory to the west of the given tile, if it exists. -std::optional AIE1TargetModel::getMemWest(TileID src) const { +Optional AIE1TargetModel::getMemWest(TileID src) const { bool isEvenRow = ((src.second % 2) == 0); - std::optional ret; + Optional ret; if (isEvenRow) ret = src; else @@ -38,9 +38,9 @@ std::optional AIE1TargetModel::getMemWest(TileID src) const { return ret; } // Return the tile ID of the memory to the west of the given tile, if it exists. -std::optional AIE1TargetModel::getMemEast(TileID src) const { +Optional AIE1TargetModel::getMemEast(TileID src) const { bool isEvenRow = ((src.second % 2) == 0); - std::optional ret; + Optional ret; if (isEvenRow) ret = std::make_pair(src.first + 1, src.second); else @@ -50,14 +50,14 @@ std::optional AIE1TargetModel::getMemEast(TileID src) const { return ret; } // Return the tile ID of the memory to the west of the given tile, if it exists. -std::optional AIE1TargetModel::getMemNorth(TileID src) const { - std::optional ret = std::make_pair(src.first, src.second + 1); +Optional AIE1TargetModel::getMemNorth(TileID src) const { + Optional ret = std::make_pair(src.first, src.second + 1); if (!isValidTile(*ret)) ret.reset(); return ret; } -std::optional AIE1TargetModel::getMemSouth(TileID src) const { - std::optional ret = std::make_pair(src.first, src.second - 1); +Optional AIE1TargetModel::getMemSouth(TileID src) const { + Optional ret = std::make_pair(src.first, src.second - 1); // The first row doesn't have a tile memory south if (!isValidTile(*ret) || ret->second == 0) ret.reset(); @@ -259,28 +259,28 @@ AIE1TargetModel::getNumSourceShimMuxConnections(int col, int row, AIEArch AIE2TargetModel::getTargetArch() const { return AIEArch::AIE2; } // Return the tile ID of the memory to the west of the given tile, if it exists. -std::optional AIE2TargetModel::getMemWest(TileID src) const { - std::optional ret = std::make_pair(src.first - 1, src.second); +Optional AIE2TargetModel::getMemWest(TileID src) const { + Optional ret = std::make_pair(src.first - 1, src.second); if (!isValidTile(*ret)) ret.reset(); return ret; } // Return the tile ID of the memory to the west of the given tile, if it exists. -std::optional AIE2TargetModel::getMemEast(TileID src) const { - std::optional ret = src; +Optional AIE2TargetModel::getMemEast(TileID src) const { + Optional ret = src; if (!isValidTile(*ret)) ret.reset(); return ret; } // Return the tile ID of the memory to the west of the given tile, if it exists. -std::optional AIE2TargetModel::getMemNorth(TileID src) const { - std::optional ret = std::make_pair(src.first, src.second + 1); +Optional AIE2TargetModel::getMemNorth(TileID src) const { + Optional ret = std::make_pair(src.first, src.second + 1); if (!isValidTile(*ret)) ret.reset(); return ret; } -std::optional AIE2TargetModel::getMemSouth(TileID src) const { - std::optional ret = std::make_pair(src.first, src.second - 1); +Optional AIE2TargetModel::getMemSouth(TileID src) const { + Optional ret = std::make_pair(src.first, src.second - 1); // The first row doesn't have a tile memory south // Memtiles don't have memory adjacency to neighboring core tiles. if (!isValidTile(*ret) || ret->second == 0 || diff --git a/lib/Dialect/AIE/Transforms/AIEFindFlows.cpp b/lib/Dialect/AIE/Transforms/AIEFindFlows.cpp index d0e3d64567..197e4a37c4 100644 --- a/lib/Dialect/AIE/Transforms/AIEFindFlows.cpp +++ b/lib/Dialect/AIE/Transforms/AIEFindFlows.cpp @@ -33,7 +33,7 @@ class ConnectivityAnalysis { ConnectivityAnalysis(DeviceOp &d) : device(d) {} private: - std::optional + llvm::Optional getConnectionThroughWire(Operation *op, Port masterPort) const { LLVM_DEBUG(llvm::dbgs() << "Wire:" << *op << " " << stringifyWireBundle(masterPort.first) diff --git a/lib/Dialect/AIE/Transforms/AIENormalizeAddressSpaces.cpp b/lib/Dialect/AIE/Transforms/AIENormalizeAddressSpaces.cpp index 8b332c59f3..90c1f67f4f 100644 --- a/lib/Dialect/AIE/Transforms/AIENormalizeAddressSpaces.cpp +++ b/lib/Dialect/AIE/Transforms/AIENormalizeAddressSpaces.cpp @@ -41,7 +41,7 @@ struct AIENormalizeAddressSpacesPass DeviceOp device = getOperation(); TypeConverter converter; - converter.addConversion([&](Type type) -> std::optional { + converter.addConversion([&](Type type) -> Optional { return memRefToDefaultAddressSpace(type); }); diff --git a/lib/Dialect/AIE/Transforms/AIEObjectFifoRegisterProcess.cpp b/lib/Dialect/AIE/Transforms/AIEObjectFifoRegisterProcess.cpp index 247a077d80..a8f980f8ec 100755 --- a/lib/Dialect/AIE/Transforms/AIEObjectFifoRegisterProcess.cpp +++ b/lib/Dialect/AIE/Transforms/AIEObjectFifoRegisterProcess.cpp @@ -59,11 +59,14 @@ struct AIEObjectFifoRegisterProcessPass mlir::scf::ForOp createForLoop(OpBuilder &builder, int length) { arith::ConstantOp lowerBound = builder.create( - builder.getUnknownLoc(), builder.getIndexAttr(0)); + builder.getUnknownLoc(), builder.getIndexAttr(0), + builder.getIndexType()); arith::ConstantOp upperBound = builder.create( - builder.getUnknownLoc(), builder.getIndexAttr(length)); + builder.getUnknownLoc(), builder.getIndexAttr(length), + builder.getIndexType()); arith::ConstantOp step = builder.create( - builder.getUnknownLoc(), builder.getIndexAttr(1)); + builder.getUnknownLoc(), builder.getIndexAttr(1), + builder.getIndexType()); mlir::scf::ForOp forLoop = builder.create( builder.getUnknownLoc(), lowerBound, upperBound, step); return forLoop; diff --git a/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp b/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp index cafbc83763..4b40ebafd4 100644 --- a/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp +++ b/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp @@ -386,9 +386,9 @@ struct AIEObjectFifoStatefulTransformPass /// Function that returns a pointer to the block of a Region /// that contains the AIEEndOp. - Block *findEndOpBlock(Region &r) { + Block *findEndOpBlock(Region *r) { Block *endBlock = nullptr; - for (auto &bl : r.getBlocks()) + for (auto &bl : r->getBlocks()) if (!bl.getOps().empty()) endBlock = &bl; return endBlock; @@ -476,10 +476,10 @@ struct AIEObjectFifoStatefulTransformPass target = objFifoLinks[*linkOp]; // search for MemOp - Operation *producerMem = nullptr; + MemOp *producerMem = nullptr; for (auto memOp : device.getOps()) { if (memOp.getTile() == op.getProducerTile()) { - producerMem = memOp.getOperation(); + producerMem = &memOp; break; } } @@ -487,20 +487,19 @@ struct AIEObjectFifoStatefulTransformPass // if none exists, create one TileOp objFifoTileOp = target.getProducerTileOp(); if (producerMem == nullptr) { - if (device->getNumRegions() != 1) - assert(false && "expected num regions for device op"); - OpBuilder::InsertionGuard g(builder); builder.setInsertionPointToEnd(device.getBody()); MemOp newMemOp = builder.create(builder.getUnknownLoc(), objFifoTileOp); - { - OpBuilder::InsertionGuard g(builder); - builder.setInsertionPointToStart(&newMemOp.getRegion().emplaceBlock()); - builder.create(builder.getUnknownLoc()); - } - producerMem = newMemOp.getOperation(); + producerMem = &newMemOp; + Region &r = producerMem->getBody(); + r.push_back(new Block); + // add terminator operation to end block + Block &endBlock = r.back(); + builder.setInsertionPointToStart(&endBlock); + builder.create(builder.getUnknownLoc()); } - Block *endBlock = findEndOpBlock(producerMem->getRegion(0)); + + Block *endBlock = findEndOpBlock(&(producerMem->getBody())); Block *lastDmaBlock = endBlock->getSinglePredecessor(); Block *dmaBlock = builder.createBlock(endBlock); Block *bdBlock = builder.createBlock(endBlock); @@ -545,10 +544,10 @@ struct AIEObjectFifoStatefulTransformPass int offset = 0; // search for ShimDMAOp - Operation *producerDMA = nullptr; + ShimDMAOp *producerDMA = nullptr; for (auto dmaOp : device.getOps()) { if (dmaOp.getTile() == op.getProducerTile()) { - producerDMA = dmaOp.getOperation(); + producerDMA = &dmaOp; break; } } @@ -556,21 +555,19 @@ struct AIEObjectFifoStatefulTransformPass // if none exists, create one TileOp objFifoTileOp = op.getProducerTileOp(); if (producerDMA == nullptr) { - if (device->getNumRegions() != 1) - assert(false && "expected num regions for device op"); - OpBuilder::InsertionGuard g(builder); builder.setInsertionPointToEnd(device.getBody()); ShimDMAOp newDMAOp = builder.create( builder.getUnknownLoc(), builder.getIndexType(), objFifoTileOp); - { - OpBuilder::InsertionGuard g(builder); - builder.setInsertionPointToStart(&newDMAOp.getRegion().emplaceBlock()); - builder.create(builder.getUnknownLoc()); - } - producerDMA = newDMAOp.getOperation(); + producerDMA = &newDMAOp; + Region &r = producerDMA->getBody(); + r.push_back(new Block); + // add terminator operation to end block + Block &endBlock = r.back(); + builder.setInsertionPointToStart(&endBlock); + builder.create(builder.getUnknownLoc()); } - Block *endBlock = findEndOpBlock(producerDMA->getRegion(0)); + Block *endBlock = findEndOpBlock(&(producerDMA->getBody())); Block *lastDmaBlock = endBlock->getSinglePredecessor(); Block *dmaBlock = builder.createBlock(endBlock); Block *bdBlock = builder.createBlock(endBlock); @@ -685,10 +682,10 @@ struct AIEObjectFifoStatefulTransformPass } // search for MemTileDMAOp - Operation *producerDMA = nullptr; + MemTileDMAOp *producerDMA = nullptr; for (auto dmaOp : device.getOps()) { if (dmaOp.getTile() == target.getProducerTile()) { - producerDMA = dmaOp.getOperation(); + producerDMA = &dmaOp; break; } } @@ -696,21 +693,19 @@ struct AIEObjectFifoStatefulTransformPass // if none exists, create one TileOp objFifoTileOp = target.getProducerTileOp(); if (producerDMA == nullptr) { - if (device->getNumRegions() != 1) - assert(false && "expected num regions for device op"); - OpBuilder::InsertionGuard g(builder); builder.setInsertionPointToEnd(device.getBody()); MemTileDMAOp newDMAOp = builder.create(builder.getUnknownLoc(), objFifoTileOp); - { - OpBuilder::InsertionGuard g(builder); - builder.setInsertionPointToStart(&newDMAOp.getRegion().emplaceBlock()); - builder.create(builder.getUnknownLoc()); - } - producerDMA = newDMAOp.getOperation(); + producerDMA = &newDMAOp; + Region &r = producerDMA->getBody(); + r.push_back(new Block); + // add terminator operation to end block + Block &endBlock = r.back(); + builder.setInsertionPointToStart(&endBlock); + builder.create(builder.getUnknownLoc()); } - Block *endBlock = findEndOpBlock(producerDMA->getRegion(0)); + Block *endBlock = findEndOpBlock(&(producerDMA->getBody())); Block *lastDmaBlock = endBlock->getSinglePredecessor(); Block *dmaBlock = builder.createBlock(endBlock); Block *bdBlock = builder.createBlock(endBlock); @@ -825,7 +820,8 @@ struct AIEObjectFifoStatefulTransformPass increment_value = currentDuplication * step; arith::ConstantOp increment = builder.create( - builder.getUnknownLoc(), builder.getIndexAttr(increment_value)); + builder.getUnknownLoc(), builder.getIndexAttr(increment_value), + builder.getIndexType()); arith::AddIOp sum = builder.create( builder.getUnknownLoc(), builder.getIndexType(), base, increment->getResult(0)); @@ -959,12 +955,13 @@ struct AIEObjectFifoStatefulTransformPass new_step_value; arith::ConstantOp uBound = builder.create( builder.getUnknownLoc(), - builder.getIndexAttr(new_upper_bound)); + builder.getIndexAttr(new_upper_bound), + old_upper_bound.getType()); forLoop.setUpperBound(uBound); } arith::ConstantOp new_step = builder.create( - builder.getUnknownLoc(), - builder.getIndexAttr(new_step_value)); + builder.getUnknownLoc(), builder.getIndexAttr(new_step_value), + old_upper_bound.getType()); forLoop.setStep(new_step); // duplicate loop body, insert before terminator operation diff --git a/lib/Dialect/AIE/Transforms/AIEVectorOpt.cpp b/lib/Dialect/AIE/Transforms/AIEVectorOpt.cpp index c113b82cf1..a5f70cffb0 100644 --- a/lib/Dialect/AIE/Transforms/AIEVectorOpt.cpp +++ b/lib/Dialect/AIE/Transforms/AIEVectorOpt.cpp @@ -10,7 +10,6 @@ #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" -#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" #include "mlir/Dialect/Vector/Transforms/VectorTransforms.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/IRMapping.h" @@ -35,8 +34,7 @@ struct AIEVectorOptPass : public AIEVectorOptBase { func::FuncOp f = getOperation(); // Initial store->load forwarding - IRRewriter rewriter(&getContext()); - vector::transferOpflowOpt(rewriter, f); + vector::transferOpflowOpt(f); ConversionTarget target(getContext()); target.addLegalDialect(); diff --git a/lib/Dialect/AIE/Utils/AIENetlistAnalysis.cpp b/lib/Dialect/AIE/Utils/AIENetlistAnalysis.cpp index d77618cdac..b1d583ab9c 100644 --- a/lib/Dialect/AIE/Utils/AIENetlistAnalysis.cpp +++ b/lib/Dialect/AIE/Utils/AIENetlistAnalysis.cpp @@ -249,7 +249,7 @@ xilinx::AIE::NetlistAnalysis::getMemUsageInBytes(Operation *tileOp) const { uint64_t memUsage = 0; for (auto buf : buffers[tileOp]) { auto t = buf.getType().cast(); - memUsage += t.getElementTypeBitWidth(); + memUsage += t.getSizeInBits(); } return memUsage / 8; } diff --git a/lib/Dialect/AIEVec/Transforms/AIEVectorize.cpp b/lib/Dialect/AIEVec/Transforms/AIEVectorize.cpp index 592d0b2d5d..79b41c5e58 100644 --- a/lib/Dialect/AIEVec/Transforms/AIEVectorize.cpp +++ b/lib/Dialect/AIEVec/Transforms/AIEVectorize.cpp @@ -444,8 +444,7 @@ static AffineExpr constructLinearizedAffineExpr(TransferReadOp readOp, // If the access is a map via affine apply op (e.g., A[i+2], where the map // is d0 -> d0+2), push in the map after replacing all the dims with unique // index identifiers (e.g., let the unique identifier for index i be k0). - if (affine::AffineApplyOp apOf = - value.getDefiningOp()) { + if (AffineApplyOp apOf = value.getDefiningOp()) { AffineMap map = apOf.getAffineMap(); assert(map.getNumResults() == 1 && "Failed to create linearized affineExpr for complicated index"); @@ -1079,8 +1078,7 @@ generateUPDOp(TransferReadOp readOp, // If the transfer_read has some apply operations, then they also need to // be hoisted. for (auto &value : indices) { - if (affine::AffineApplyOp apOf = - value.getDefiningOp()) { + if (AffineApplyOp apOf = value.getDefiningOp()) { // Skip hoisting if already above in lexicographical order if (apOf->getBlock() == readOp->getBlock() && apOf->isBeforeInBlock(updOp)) @@ -1134,13 +1132,13 @@ static int32_t computeVecorizedLoopStepSize(Operation *op, VectState *state) { // index. [[maybe_unused]] bool found = false; for (auto loop : enclosingLoops) { - auto iv = cast(loop).getInductionVar(); - auto invariants = mlir::affine::getInvariantAccesses(iv, indices); + auto iv = cast(loop).getInductionVar(); + auto invariants = getInvariantAccesses(iv, indices); if (!invariants.count(index)) { assert( !found && "stepsize computation already has an entry along the variant dim"); - step = cast(loop).getStep(); + step = cast(loop).getStep(); found = true; } } @@ -2440,10 +2438,9 @@ static void generateAIEAddOrSubOpsInFunc(func::FuncOp func, VectState *state) { // generate the UPD ops, we first visit the innermost for op, and for each // transfer_read instruction nested inside that op, create a set of UPD ops, // and then insert them in the front bb of that for op's region. -static void insertUPDOpsInLoop(affine::AffineForOp forOp, VectState *state) { +static void insertUPDOpsInLoop(AffineForOp forOp, VectState *state) { // Recursively generate UPD ops in the nested for op's. - for (affine::AffineForOp nestedOp : - forOp.getRegion().getOps()) + for (AffineForOp nestedOp : forOp.getRegion().getOps()) insertUPDOpsInLoop(nestedOp, state); // A map from an interval to the UPD op. The key gives the interval that @@ -2474,7 +2471,7 @@ static void insertUPDOpsInLoop(affine::AffineForOp forOp, VectState *state) { // Replace all the transfer_read ops with UPD ops in the function. static void insertUPDOpsInFunc(func::FuncOp func, VectState *state) { - for (affine::AffineForOp forOp : func.getOps()) { + for (AffineForOp forOp : func.getOps()) { insertUPDOpsInLoop(forOp, state); } } @@ -2610,8 +2607,7 @@ static void redundantLoadStoreOptimization(ModuleOp module) { }); // Now that all the transfer ops are marked inbound, remove redundant // vector loads/stores - IRRewriter rewriter(module.getContext()); - vector::transferOpflowOpt(rewriter, func); + transferOpflowOpt(func); } } @@ -2641,11 +2637,10 @@ static void postCanonicalizeIR(ModuleOp module) { // Iterate over the loop nestings to form loop nesting bands. Then for each // block within those bands, the enclosingLoops is set to the loop band. static void -computeEnclosingLoopsPerBlock(affine::AffineForOp forOp, VectState *state, +computeEnclosingLoopsPerBlock(AffineForOp forOp, VectState *state, SmallVector &enclosingLoops) { // Form the loop band for nested for ops - for (affine::AffineForOp nestedOp : - forOp.getRegion().getOps()) { + for (AffineForOp nestedOp : forOp.getRegion().getOps()) { enclosingLoops.push_back(nestedOp); computeEnclosingLoopsPerBlock(nestedOp, state, enclosingLoops); enclosingLoops.pop_back(); @@ -2879,9 +2874,9 @@ static LogicalResult isUnalignedLoad(TransferReadOp readOp, VectState *state) { // Iterate over all enclosing loops, and find the one that is variant in // index. for (auto loop : enclosingLoops) { - auto affineForOp = cast(loop); + AffineForOp affineForOp = cast(loop); auto iv = affineForOp.getInductionVar(); - auto invariants = mlir::affine::getInvariantAccesses(iv, indices); + auto invariants = getInvariantAccesses(iv, indices); if (!invariants.count(index)) { step = affineForOp.getStep(); @@ -2895,7 +2890,7 @@ static LogicalResult isUnalignedLoad(TransferReadOp readOp, VectState *state) { // upper bound's affine_map offset and loop step, we need to check // whether affine map's offset of loop upper bound is divisible by // the vector lanes. - affine::AffineBound ub = affineForOp.getUpperBound(); + AffineBound ub = affineForOp.getUpperBound(); AffineMap origUbMap = ub.getMap(); if (!origUbMap.isEmpty() && !origUbMap.isConstant()) { AffineExpr origUbMapResult = origUbMap.getResult(0); @@ -3019,7 +3014,7 @@ void AIEVectorize::runOnOperation() { // First compute the loops surrounding each load/store operation. This is // necessary to identify loads/stores that are nested together. - for (auto forOp : func.getOps()) { + for (AffineForOp forOp : func.getOps()) { SmallVector enclosingLoops; enclosingLoops.push_back(forOp); computeEnclosingLoopsPerBlock(forOp, state, enclosingLoops); diff --git a/lib/Dialect/AIEVec/Transforms/ConvertVectorToAIEVec.cpp b/lib/Dialect/AIEVec/Transforms/ConvertVectorToAIEVec.cpp index 72025f01da..ba83c28822 100644 --- a/lib/Dialect/AIEVec/Transforms/ConvertVectorToAIEVec.cpp +++ b/lib/Dialect/AIEVec/Transforms/ConvertVectorToAIEVec.cpp @@ -82,19 +82,18 @@ using SetInboundsToWriteOp = SetInboundsToReadStoreOpPattern; //===----------------------------------------------------------------------===// struct RedundantLoadStoreOptimizationPass - : public PassWrapper> { - + : public PassWrapper> { void runOnOperation() override { - auto op = getOperation(); + func::FuncOp funcOp = getOperation(); MLIRContext *context = &getContext(); RewritePatternSet patterns(context); patterns.add( patterns.getContext()); - (void)applyPatternsAndFoldGreedily(op, std::move(patterns)); - IRRewriter rewriter(&getContext()); - vector::transferOpflowOpt(rewriter, op); + (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); + transferOpflowOpt(funcOp); } }; diff --git a/lib/Dialect/AIEVec/Transforms/FoldMulAddChainToConvOp.cpp b/lib/Dialect/AIEVec/Transforms/FoldMulAddChainToConvOp.cpp index ab8f930ceb..f129a8a0b1 100644 --- a/lib/Dialect/AIEVec/Transforms/FoldMulAddChainToConvOp.cpp +++ b/lib/Dialect/AIEVec/Transforms/FoldMulAddChainToConvOp.cpp @@ -241,10 +241,8 @@ struct LongestConvMACChainAnalysis { return isa(op) || isa(op) || isa(op); }; - BackwardSliceOptions backwardSliceOptions; - backwardSliceOptions.filter = opFilter; - getBackwardSlice(mulOpOperand, &opBwdSlices, backwardSliceOptions); + getBackwardSlice(mulOpOperand, &opBwdSlices, opFilter); opBwdSlices.insert(mulOpOperand); LLVM_DEBUG(llvm::dbgs() << "opBwdSlices = [\n"); diff --git a/lib/Dialect/AIEVec/Transforms/VectorToAIEVecConversions.cpp b/lib/Dialect/AIEVec/Transforms/VectorToAIEVecConversions.cpp index f581a25d38..cef320d992 100644 --- a/lib/Dialect/AIEVec/Transforms/VectorToAIEVecConversions.cpp +++ b/lib/Dialect/AIEVec/Transforms/VectorToAIEVecConversions.cpp @@ -429,7 +429,7 @@ struct UPDOpEffectiveAccessSizeAnalysis { UPDOpEffectiveAccessSizeAnalysis(aievec::UPDOp updOp) { auto vecType = cast(updOp.getResult().getType()); unsigned sizeInBits = - cast(vecType).getElementTypeBitWidth() - updOp.getOffset(); + cast(vecType).getSizeInBits() - updOp.getOffset(); for (Operation *user : updOp->getUsers()) { auto userUpdOp = dyn_cast(user); if (userUpdOp) @@ -463,7 +463,7 @@ struct FoldVectorExtractAndBroadcastToAIEBroadcast auto src = extOp.getVector(); auto pos = extOp.getPosition(); - int64_t posVal = pos[0]; + int64_t posVal = cast(pos[0]).getInt(); VectorType srcVecType = cast(src.getType()); VectorType resultType = cast(bcastOp.getResult().getType()); if (srcVecType != resultType) { @@ -837,7 +837,7 @@ struct FoldBroadcastToFMAOp : public OpConversionPattern { .getResult(); // XXX: We assume a 1D vector auto pos = extOp.getPosition(); - int64_t zstart = pos[0]; + int64_t zstart = cast(pos[0]).getInt(); auto fmaOpAttr = buildFMAOpSplatAttrForElemTy(fmaOp, zstart); rewriter.replaceOpWithNewOp( fmaOp, TypeRange({fmaOp.getResult().getType()}), @@ -1656,7 +1656,7 @@ struct LowerVectorExtractStridedSliceOpAIEv1Pattern LogicalResult matchAndRewrite(vector::ExtractStridedSliceOp extractOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { - auto vType = extractOp.getSourceVectorType(); + auto vType = extractOp.getVectorType(); if (vType.getRank() != 1) return failure(); diff --git a/lib/Dialect/AIEVec/Transforms/VectorToVectorConversions.cpp b/lib/Dialect/AIEVec/Transforms/VectorToVectorConversions.cpp index 50a84d912d..b5b404fdd7 100644 --- a/lib/Dialect/AIEVec/Transforms/VectorToVectorConversions.cpp +++ b/lib/Dialect/AIEVec/Transforms/VectorToVectorConversions.cpp @@ -90,11 +90,11 @@ struct SplitUnalignedTransferReadPattern Value oldInnerMostIdx = adaptor.getIndices().back(); auto offsetCorrectionMap = AffineMap::get(1, 0, getAffineDimExpr(0, readOp.getContext()) - offset); - Value newInnerMostIdx = rewriter - .create( - readOp.getLoc(), offsetCorrectionMap, - SmallVector({oldInnerMostIdx})) - .getResult(); + Value newInnerMostIdx = + rewriter + .create(readOp.getLoc(), offsetCorrectionMap, + SmallVector({oldInnerMostIdx})) + .getResult(); SmallVector alignedIdx; alignedIdx.append(adaptor.getIndices().begin(), adaptor.getIndices().end()); alignedIdx[alignedIdx.size() - 1] = newInnerMostIdx; @@ -154,7 +154,7 @@ struct ConvertSplatTransferReadToBroadcastPattern // If the innermost index comes from an `affine.apply` op, take the base // as the new innermost index for the new `vector.transfer_read`, and the // offset as the index for the `aievec.broadcast` op. - if (auto applyOp = newIdx.getDefiningOp()) + if (auto applyOp = newIdx.getDefiningOp()) if (applyOp.getAffineMap().getNumDims() == 1) { newIdx = applyOp.getMapOperands()[0]; offset = applyOp.getAffineMap().compose(ArrayRef{0})[0]; @@ -169,11 +169,10 @@ struct ConvertSplatTransferReadToBroadcastPattern offset = offset % vlen; auto newAddrMap = AffineMap::get( 1, 0, getAffineDimExpr(0, readOp.getContext()) + numElemsToSkip); - newIdx = - rewriter - .create(readOp.getLoc(), newAddrMap, - SmallVector({newIdx})) - .getResult(); + newIdx = rewriter + .create(readOp.getLoc(), newAddrMap, + SmallVector({newIdx})) + .getResult(); } indices[indices.size() - 1] = newIdx; auto newReadOp = rewriter.create( @@ -202,8 +201,9 @@ struct HoistCastOpToDataSourcePattern : public RewritePattern { arith::ExtSIOp extOp = cast(op); Operation *defOp = extOp.getIn().getDefiningOp(); // If it's a data source op, we're done. - if (!defOp || isa(defOp)) + if (!defOp || + isa( + defOp)) return failure(); // At the moment, we only accept ops we know we can swap with cast. diff --git a/lib/Dialect/AIEVec/Utils/Utils.cpp b/lib/Dialect/AIEVec/Utils/Utils.cpp index 7d19c2d4e9..bcfe67b85e 100644 --- a/lib/Dialect/AIEVec/Utils/Utils.cpp +++ b/lib/Dialect/AIEVec/Utils/Utils.cpp @@ -29,7 +29,7 @@ static std::optional getLowerBoundValue(Value idx) { if (auto blkArg = dyn_cast(idx)) { auto parentOp = blkArg.getOwner()->getParentOp(); return TypeSwitch>(parentOp) - .Case([&blkArg](affine::AffineForOp forOp) { + .Case([&blkArg](AffineForOp forOp) { if (forOp.getInductionVar() == blkArg && forOp.hasConstantLowerBound()) return std::optional(forOp.getConstantLowerBound()); @@ -46,7 +46,7 @@ static std::optional getLowerBoundValue(Value idx) { return std::optional( cast(constantOp.getValue()).getInt()); }) - .Case([](auto applyOp) { + .Case([](auto applyOp) { if (applyOp.getAffineMap().getNumResults() == 1) { SmallVector srcIndices; for (auto index : applyOp.getMapOperands()) { diff --git a/lib/Targets/AIETargetXAIEV2.cpp b/lib/Targets/AIETargetXAIEV2.cpp index 63f1de1f23..5c0b3aa7fc 100644 --- a/lib/Targets/AIETargetXAIEV2.cpp +++ b/lib/Targets/AIETargetXAIEV2.cpp @@ -49,7 +49,7 @@ const char xaie_c_file_header[] = R"code( // The following is a wrapper for the common "if(call() != 0) return 1" pattern. // Use this only in functions that return int. If the call this wrapper is used -// on does not succeed, the expanded code will exit out of the function +// on does not succeed, the expanded code will exit out of the function // containing this macro with an error code. #define __mlir_aie_try(x) do { \ AieRC ret = (x); \ @@ -896,7 +896,7 @@ mlir::LogicalResult AIETranslateToXAIEV2(ModuleOp module, raw_ostream &output) { int row = coord.second; auto loc = tileLocStr(col, row); - auto bufferAccessor = [&](std::optional tile, BufferOp buf) { + auto bufferAccessor = [&](Optional tile, BufferOp buf) { // int32_t mlir_aie_read_buffer_a13(int index) { // void mlir_aie_write_buffer_a13(int index, int32_t value) { std::string bufName(buf.name().getValue()); diff --git a/lib/Targets/AIETargets.cpp b/lib/Targets/AIETargets.cpp index b49c3dc0b0..238338b76e 100644 --- a/lib/Targets/AIETargets.cpp +++ b/lib/Targets/AIETargets.cpp @@ -158,7 +158,7 @@ void registerAIETranslations() { output << "// Tile(" << srcCol << ", " << srcRow << ")\n"; output << "// Memory map: name base_address num_bytes\n"; - auto doBuffer = [&](std::optional tile, int offset) { + auto doBuffer = [&](Optional tile, int offset) { if (tiles.count(*tile)) for (auto buf : buffers[tiles[*tile]]) writeBufferMap(output, buf, offset, NL); @@ -307,7 +307,7 @@ SECTIONS *(.rodata*) } > data )THESCRIPT"; - auto doBuffer = [&](std::optional tile, int offset, + auto doBuffer = [&](Optional tile, int offset, std::string dir) { if (tile) { if (tiles.count(*tile)) @@ -420,7 +420,7 @@ SECTIONS << " //Don't put data in code memory\n"; auto srcCoord = std::make_pair(tile.colIndex(), tile.rowIndex()); - auto doBuffer = [&](std::optional tile, int offset, + auto doBuffer = [&](Optional tile, int offset, std::string dir) { if (tile) { if (tiles.count(*tile)) diff --git a/python/aie/compiler/aiecc/main.py b/python/aie/compiler/aiecc/main.py index 673165c2cd..d28a934ce0 100644 --- a/python/aie/compiler/aiecc/main.py +++ b/python/aie/compiler/aiecc/main.py @@ -37,7 +37,7 @@ '--lower-affine', '--convert-math-to-llvm', '--convert-arith-to-llvm', - '--finalize-memref-to-llvm', + '--convert-memref-to-llvm', '--convert-func-to-llvm=use-bare-ptr-memref-call-conv', '--convert-cf-to-llvm', '--canonicalize', @@ -96,7 +96,7 @@ def run_passes(self, pass_pipeline, mlir_module_str, outputfile=None): with Context() as ctx, Location.unknown(): aiedialect.register_dialect(ctx) module = Module.parse(mlir_module_str) - PassManager.parse(pass_pipeline).run(module.operation) + PassManager.parse(pass_pipeline).run(module) mlir_module_str = str(module) if outputfile: with open(outputfile, 'w') as g: diff --git a/python/aie/dialects/AieBinding.td b/python/aie/dialects/AieBinding.td index a916555503..5501e2fda7 100644 --- a/python/aie/dialects/AieBinding.td +++ b/python/aie/dialects/AieBinding.td @@ -9,6 +9,7 @@ #ifndef AIE_BINDING_TD #define AIE_BINDING_TD +include "mlir/Bindings/Python/Attributes.td" include "aie/Dialect/AIE/IR/AIE.td" #endif // AIE_BINDING_TD \ No newline at end of file diff --git a/test/Conversion/AIEVecToLLVM/test-upd_large.mlir b/test/Conversion/AIEVecToLLVM/test-upd_large.mlir index 59932da921..5bd0bb8acf 100644 --- a/test/Conversion/AIEVecToLLVM/test-upd_large.mlir +++ b/test/Conversion/AIEVecToLLVM/test-upd_large.mlir @@ -1,5 +1,4 @@ // RUN: aie-opt %s --convert-aievec-to-llvm | FileCheck %s -// XFAIL: * // Test loads and updates to a vector register module { func.func @test(%arg0: memref<4x32x64xi16>) { diff --git a/test/Conversion/AIEVecToLLVM/test-upd_small.mlir b/test/Conversion/AIEVecToLLVM/test-upd_small.mlir index 789579c65b..c094bd6670 100644 --- a/test/Conversion/AIEVecToLLVM/test-upd_small.mlir +++ b/test/Conversion/AIEVecToLLVM/test-upd_small.mlir @@ -1,5 +1,4 @@ // RUN: aie-opt %s --convert-aievec-to-llvm | FileCheck %s -// XFAIL: * // Test a direct load to vector register that does not actually need an update module { func.func @test(%arg0: memref<4x32x64xi16>) { diff --git a/test/aievec/test_linalg_conv2d.mlir b/test/aievec/test_linalg_conv2d.mlir index 2d75b47cd9..db0fdbe96f 100644 --- a/test/aievec/test_linalg_conv2d.mlir +++ b/test/aievec/test_linalg_conv2d.mlir @@ -178,8 +178,8 @@ func.func @conv_2d(%arg0: memref<10x3x256x256xf32>, %arg1: memref<10x3x3x3xf32>, return } -//CHECK-NEXT: %c1 = arith.constant 1 : index //CHECK-NEXT: %c2 = arith.constant 2 : index +//CHECK-NEXT: %c1 = arith.constant 1 : index //CHECK-NEXT: %c0 = arith.constant 0 : index //CHECK-NEXT: %c0_0 = arith.constant 0 : index //CHECK-NEXT: %c10 = arith.constant 10 : index diff --git a/tools/aie-opt/aie-opt.cpp b/tools/aie-opt/aie-opt.cpp index de378f1737..c20d2d8ba9 100644 --- a/tools/aie-opt/aie-opt.cpp +++ b/tools/aie-opt/aie-opt.cpp @@ -55,6 +55,7 @@ int main(int argc, char **argv) { registry.insert(); registry.insert(); - return failed( - MlirOptMain(argc, argv, "MLIR modular optimizer driver\n", registry)); + return failed(MlirOptMain(argc, argv, "MLIR modular optimizer driver\n", + registry, + /*preloadDialectsInContext=*/false)); } diff --git a/utils/clone-llvm.sh b/utils/clone-llvm.sh index 665bcb8301..c8e748dcb5 100755 --- a/utils/clone-llvm.sh +++ b/utils/clone-llvm.sh @@ -15,7 +15,7 @@ # The LLVM commit to use. # TODO: create a branch or a tag instead, to avoid fetching main and # this commit later. -commithash=11c3b979e6512b00a5bd9c3e0d4ed986cf500630 +commithash=35ca64989a75c93ea7e935ef11c3d1883c21cccd here=$PWD