diff --git a/lib/Dialect/AIEVec/Transforms/VectorToVectorConversions.cpp b/lib/Dialect/AIEVec/Transforms/VectorToVectorConversions.cpp index 9061b28dfc..94ec83098f 100644 --- a/lib/Dialect/AIEVec/Transforms/VectorToVectorConversions.cpp +++ b/lib/Dialect/AIEVec/Transforms/VectorToVectorConversions.cpp @@ -145,20 +145,32 @@ struct ConvertSplatTransferReadToBroadcastPattern if (!map.isConstant()) return failure(); - // If the innermost index comes from an `affine.apply` op, take the base - // as the new innermost index for the new `vector.transfer_read`, and the - // offset as the index for the `aievec.broadcast` op. + Value srcMemRef = adaptor.getSource(); SmallVector indices; - indices.append(adaptor.getIndices().begin(), adaptor.getIndices().end()); - Value innerMostIdx = indices[indices.size() - 1]; - Value newIdx = innerMostIdx; + Value newIdx; int64_t offset = 0; - if (auto defOp = innerMostIdx.getDefiningOp()) - if (auto applyOp = dyn_cast(defOp)) + // If it's a zero-rank memory access + if (cast(srcMemRef.getType()).getRank() == 0) { + srcMemRef = rewriter + .create( + readOp.getLoc(), SmallVector({1}), + srcMemRef, SmallVector({})) + .getResult(); + newIdx = rewriter.create(readOp.getLoc(), + rewriter.getIndexAttr(0L)); + indices.push_back(newIdx); + } else { + indices.append(adaptor.getIndices().begin(), adaptor.getIndices().end()); + newIdx = indices[indices.size() - 1]; + // If the innermost index comes from an `affine.apply` op, take the base + // as the new innermost index for the new `vector.transfer_read`, and the + // offset as the index for the `aievec.broadcast` op. + if (auto applyOp = newIdx.getDefiningOp()) if (applyOp.getAffineMap().getNumDims() == 1) { newIdx = applyOp.getMapOperands()[0]; offset = applyOp.getAffineMap().compose(ArrayRef{0})[0]; } + } // XXX: We assume we are reading 1D vectors int64_t vlen = readOp.getVector().getType().getShape()[0]; if (offset >= vlen) { @@ -175,8 +187,8 @@ struct ConvertSplatTransferReadToBroadcastPattern } indices[indices.size() - 1] = newIdx; auto newReadOp = rewriter.create( - readOp.getLoc(), readOp.getVector().getType(), adaptor.getSource(), - indices, adaptor.getPadding()); + readOp.getLoc(), readOp.getVector().getType(), srcMemRef, indices, + adaptor.getPadding()); auto extractOp = rewriter.create( readOp.getLoc(), newReadOp.getResult(), ArrayRef{offset}); rewriter.replaceOpWithNewOp( @@ -232,10 +244,9 @@ struct ComputeExpOpByLUTPattern : public OpConversionPattern { //============================================================================// static void configureCommonAIECanonicalizeLegalizations(ConversionTarget &target) { - target.addLegalDialect(); - target.addLegalDialect(); - target.addLegalDialect(); - target.addLegalDialect(); + target.addLegalDialect(); } static void @@ -325,11 +336,14 @@ struct CanonicalizeVectorForAIEVecPass StringRef getArgument() const final { return "test-canonicalize-vector-for-aievec"; } + StringRef getDescription() const final { return "Canonicalize vector operations for AIEVec conversion"; } + void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); + registry.insert(); } Option aieTarget{ diff --git a/lib/Targets/AIEVecToCpp/TranslateAIEVecToCpp.cpp b/lib/Targets/AIEVecToCpp/TranslateAIEVecToCpp.cpp index f2353e9149..caf9829198 100644 --- a/lib/Targets/AIEVecToCpp/TranslateAIEVecToCpp.cpp +++ b/lib/Targets/AIEVecToCpp/TranslateAIEVecToCpp.cpp @@ -1878,6 +1878,26 @@ static LogicalResult printOperation(CppEmitter &emitter, return success(); } +// Print an expand shape by forwarding the value to the next op +static LogicalResult printOperation(CppEmitter &emitter, + memref::ExpandShapeOp expandShapeOp) { + Value source = expandShapeOp.getSrc(); + + // If the memref being outputted is not already emitted, + // error out + if (!emitter.hasValueInScope(source)) + return failure(); + + if (failed(emitter.emitAssignPrefix(*expandShapeOp))) + return failure(); + + raw_indented_ostream &os = emitter.ostream(); + + os << emitter.getOrCreateName(source); + + return success(); +} + static LogicalResult printConstantOp(CppEmitter &emitter, Operation *operation, Attribute value) { OpResult result = operation->getResult(0); @@ -2856,6 +2876,8 @@ LogicalResult CppEmitter::emitOperation(Operation &op, bool trailingSemicolon) { // Memref ops. .Case( [&](auto op) { return printOperation(*this, op); }) + .Case( + [&](auto op) { return printOperation(*this, op); }) .Case, %pos : index) -> vecto // CHECK: return %[[AV]] : vector<8xi32> return %v : vector<8xi32> } + +// ----- + +// CHECK-LABEL: func.func @rank_zero_transfer_read( +// CHECK-SAME: %[[MEM:.*]]: memref +func.func @rank_zero_transfer_read(%m : memref) -> vector<16xi16> { + %c0_i16 = arith.constant 0 : i16 + // CHECK-DAG: %[[C0idx:.*]] = arith.constant 0 : index + // CHECK-DAG: %[[C0i16:.*]] = arith.constant 0 : i16 + // CHECK-DAG: %[[EXPMEM:.*]] = memref.expand_shape %[[MEM]] [] : memref into memref<1xi16> + // CHECK: %[[LV:.*]] = vector.transfer_read %[[EXPMEM]][%[[C0idx]]], %[[C0i16]] : memref<1xi16>, vector<16xi16> + // CHECK: %[[E:.*]] = vector.extract %[[LV]][0] : vector<16xi16> + // CHECK: %[[S:.*]] = vector.broadcast %[[E]] : i16 to vector<16xi16> + %v = vector.transfer_read %m[], %c0_i16 {permutation_map = affine_map<()->(0)>} : memref, vector<16xi16> + // CHECK: return %[[S]] : vector<16xi16> + return %v : vector<16xi16> +}