Skip to content

Commit

Permalink
[VPlan] Convert induction increment check to be VPlan-based.
Browse files Browse the repository at this point in the history
Check the VPlan directly to determine if a VPValue is an optimiziable IV
or IV use instead of checking the underlying IR instructions.

Split off from llvm#112147. This
refactoring enables moving IV end value creation from the legacy
fixupIVUsers to a VPlan-based transform.

There is one case we now won't optimize, that is IVs with subtracts and
non-constant steps. But as this is a minor optimization and doesn't
impact correctness, the benefits of performing the check in VPlan should
outweigh the missed case.
  • Loading branch information
fhahn committed Jan 5, 2025
1 parent ba93ecc commit df4a615
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 19 deletions.
84 changes: 67 additions & 17 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8953,14 +8953,73 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
}
}

/// Return true if \p VPV is an optimizable IV or IV use. That is, if \p VPV is
/// either an untruncated wide induction, or if it increments a wide induction
/// by its step.
static bool isOptimizableIVOrUse(VPValue *VPV) {
VPRecipeBase *Def = VPV->getDefiningRecipe();
if (!Def)
return false;
auto *WideIV = dyn_cast<VPWidenInductionRecipe>(Def);
if (WideIV) {
// VPV itself is a wide induction, separately compute the end value for exit
// users if it is not a truncated IV.
return isa<VPWidenPointerInductionRecipe>(WideIV) ||
!cast<VPWidenIntOrFpInductionRecipe>(WideIV)->getTruncInst();
}

// Check if VPV is an optimizable induction increment.
if (Def->getNumOperands() != 2)
return false;
WideIV = dyn_cast<VPWidenInductionRecipe>(Def->getOperand(0));
if (!WideIV)
WideIV = dyn_cast<VPWidenInductionRecipe>(Def->getOperand(1));
if (!WideIV)
return false;

using namespace VPlanPatternMatch;
auto &ID = WideIV->getInductionDescriptor();

// Check if VPV increments the induction by the induction step.
VPValue *IVStep = WideIV->getStepValue();
switch (ID.getInductionOpcode()) {
case Instruction::Add:
return match(VPV, m_c_Binary<Instruction::Add>(m_Specific(WideIV),
m_Specific(IVStep)));
case Instruction::FAdd:
return match(VPV, m_c_Binary<Instruction::FAdd>(m_Specific(WideIV),
m_Specific(IVStep)));
case Instruction::FSub:
return match(VPV, m_Binary<Instruction::FSub>(m_Specific(WideIV),
m_Specific(IVStep)));
case Instruction::Sub: {
// IVStep will be the negated step of the subtraction. Check if Step == -1 *
// IVStep.
VPValue *Step;
if (!match(VPV, m_Binary<Instruction::Sub>(m_VPValue(), m_VPValue(Step))) ||
!Step->isLiveIn() || !IVStep->isLiveIn())
return false;
auto *StepCI = dyn_cast<ConstantInt>(Step->getLiveInIRValue());
auto *IVStepCI = dyn_cast<ConstantInt>(IVStep->getLiveInIRValue());
return StepCI && IVStepCI &&
StepCI->getValue() == (-1 * IVStepCI->getValue());
}
default:
return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
match(VPV, m_GetElementPtr(m_Specific(WideIV),
m_Specific(WideIV->getStepValue())));
}
llvm_unreachable("should have been covered by switch above");
}

// Collect VPIRInstructions for phis in the exit blocks that are modeled
// in VPlan and add the exiting VPValue as operand. Some exiting values are not
// modeled explicitly yet and won't be included. Those are un-truncated
// VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe and induction
// increments.
static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan,
const MapVector<PHINode *, InductionDescriptor> &Inductions) {
static SetVector<VPIRInstruction *>
collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder,
VPlan &Plan) {
auto *MiddleVPBB = Plan.getMiddleBlock();
SetVector<VPIRInstruction *> ExitUsersToFix;
for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) {
Expand All @@ -8985,18 +9044,9 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
// Exit values for inductions are computed and updated outside of VPlan
// and independent of induction recipes.
// TODO: Compute induction exit values in VPlan.
if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
!cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst()) ||
isa<VPWidenPointerInductionRecipe>(V) ||
(isa<Instruction>(IncomingValue) &&
OrigLoop->contains(cast<Instruction>(IncomingValue)) &&
any_of(IncomingValue->users(), [&Inductions](User *U) {
auto *P = dyn_cast<PHINode>(U);
return P && Inductions.contains(P);
}))) {
if (ExitVPBB->getSinglePredecessor() == MiddleVPBB)
continue;
}
if (isOptimizableIVOrUse(V) &&
ExitVPBB->getSinglePredecessor() == MiddleVPBB)
continue;
ExitUsersToFix.insert(ExitIRI);
ExitIRI->addOperand(V);
}
Expand Down Expand Up @@ -9331,8 +9381,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
*Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder);
}
addScalarResumePhis(RecipeBuilder, *Plan);
SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlocks(
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
SetVector<VPIRInstruction *> ExitUsersToFix =
collectUsersInExitBlocks(OrigLoop, RecipeBuilder, *Plan);
addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
if (!addUsersInExitBlocks(*Plan, ExitUsersToFix)) {
reportVectorizationFailure(
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -2095,6 +2095,15 @@ class VPWidenInductionRecipe : public VPHeaderPHIRecipe {
R->getVPDefID() == VPDef::VPWidenPointerInductionSC;
}

static inline bool classof(const VPValue *V) {
auto *R = V->getDefiningRecipe();
return R && classof(R);
}

static inline bool classof(const VPHeaderPHIRecipe *R) {
return classof(static_cast<const VPRecipeBase *>(R));
}

virtual void execute(VPTransformState &State) override = 0;

/// Returns the step value of the induction.
Expand Down
21 changes: 20 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,8 @@ struct MatchRecipeAndOpcode<Opcode, RecipeTy> {
if constexpr (std::is_same<RecipeTy, VPScalarIVStepsRecipe>::value ||
std::is_same<RecipeTy, VPCanonicalIVPHIRecipe>::value ||
std::is_same<RecipeTy, VPWidenSelectRecipe>::value ||
std::is_same<RecipeTy, VPDerivedIVRecipe>::value)
std::is_same<RecipeTy, VPDerivedIVRecipe>::value ||
std::is_same<RecipeTy, VPWidenGEPRecipe>::value)
return DefR;
else
return DefR && DefR->getOpcode() == Opcode;
Expand Down Expand Up @@ -309,6 +310,12 @@ m_Binary(const Op0_t &Op0, const Op1_t &Op1) {
return AllBinaryRecipe_match<Op0_t, Op1_t, Opcode, Commutative>(Op0, Op1);
}

template <unsigned Opcode, typename Op0_t, typename Op1_t>
inline AllBinaryRecipe_match<Op0_t, Op1_t, Opcode, true>
m_c_Binary(const Op0_t &Op0, const Op1_t &Op1) {
return AllBinaryRecipe_match<Op0_t, Op1_t, Opcode, true>(Op0, Op1);
}

template <typename Op0_t, typename Op1_t>
inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::Mul>
m_Mul(const Op0_t &Op0, const Op1_t &Op1) {
Expand Down Expand Up @@ -339,6 +346,18 @@ m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1) {
return m_BinaryOr<Op0_t, Op1_t, /*Commutative*/ true>(Op0, Op1);
}

template <typename Op0_t, typename Op1_t>
using GEPLikeRecipe_match =
BinaryRecipe_match<Op0_t, Op1_t, Instruction::GetElementPtr, false,
VPWidenRecipe, VPReplicateRecipe, VPWidenGEPRecipe,
VPInstruction>;

template <typename Op0_t, typename Op1_t>
inline GEPLikeRecipe_match<Op0_t, Op1_t> m_GetElementPtr(const Op0_t &Op0,
const Op1_t &Op1) {
return GEPLikeRecipe_match<Op0_t, Op1_t>(Op0, Op1);
}

template <typename Op0_t, typename Op1_t, typename Op2_t, unsigned Opcode>
using AllTernaryRecipe_match =
Recipe_match<std::tuple<Op0_t, Op1_t, Op2_t>, Opcode, false,
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/Transforms/LoopVectorize/X86/induction-step.ll
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ define i16 @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i16> [[TMP10]], i32 3
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
Expand All @@ -131,7 +132,7 @@ define i16 @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) {
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: [[SUB_LCSSA:%.*]] = phi i16 [ [[SUB]], [[LOOP]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[SUB_LCSSA:%.*]] = phi i16 [ [[SUB]], [[LOOP]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i16 [[SUB_LCSSA]]
;
entry:
Expand Down

0 comments on commit df4a615

Please sign in to comment.