Skip to content

Commit

Permalink
[GlobalISel][AArch64][AMDGPU] Expand FPOWI into series of multiplicat…
Browse files Browse the repository at this point in the history
…ion (#95217)

SelectionDAG already converts FPOWI into a series of optimized multiplications, 
this patch introduces the same optimization into GlobalISel.
  • Loading branch information
isuckatcs authored Jun 28, 2024
1 parent 5db3579 commit 937d79b
Show file tree
Hide file tree
Showing 7 changed files with 413 additions and 300 deletions.
7 changes: 7 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -865,6 +865,13 @@ class CombinerHelper {
/// By default, it erases the instruction def'd on \p MO from the function.
void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo);

/// Match FPOWI if it's safe to extend it into a series of multiplications.
bool matchFPowIExpansion(MachineInstr &MI, int64_t Exponent);

/// Expands FPOWI into a series of multiplications and a division if the
/// exponent is negative.
void applyExpandFPowI(MachineInstr &MI, int64_t Exponent);

/// Combine insert vector element OOB.
bool matchInsertVectorElementOOB(MachineInstr &MI, BuildFnTy &MatchInfo);

Expand Down
9 changes: 8 additions & 1 deletion llvm/include/llvm/Target/GlobalISel/Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -1636,6 +1636,13 @@ def sub_of_vscale : GICombineRule<
[{ return Helper.matchSubOfVScale(${root}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;

def expand_const_fpowi : GICombineRule<
(defs root:$root),
(match (G_CONSTANT $int, $imm),
(G_FPOWI $dst, $float, $int):$root,
[{ return Helper.matchFPowIExpansion(*${root}, ${imm}.getCImm()->getSExtValue()); }]),
(apply [{ Helper.applyExpandFPowI(*${root}, ${imm}.getCImm()->getSExtValue()); }])>;

// match_extract_of_element and insert_vector_elt_oob must be the first!
def vector_ops_combines: GICombineGroup<[
match_extract_of_element_undef_vector,
Expand Down Expand Up @@ -1786,7 +1793,7 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
def const_combines : GICombineGroup<[constant_fold_fp_ops, const_ptradd_to_i2p,
overlapping_and, mulo_by_2, mulo_by_0,
adde_to_addo,
combine_minmax_nan]>;
combine_minmax_nan, expand_const_fpowi]>;

def known_bits_simplifications : GICombineGroup<[
redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask,
Expand Down
48 changes: 48 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7349,6 +7349,54 @@ void CombinerHelper::applyBuildFnMO(const MachineOperand &MO,
Root->eraseFromParent();
}

bool CombinerHelper::matchFPowIExpansion(MachineInstr &MI, int64_t Exponent) {
bool OptForSize = MI.getMF()->getFunction().hasOptSize();
return getTargetLowering().isBeneficialToExpandPowI(Exponent, OptForSize);
}

void CombinerHelper::applyExpandFPowI(MachineInstr &MI, int64_t Exponent) {
auto [Dst, Base] = MI.getFirst2Regs();
LLT Ty = MRI.getType(Dst);
int64_t ExpVal = Exponent;

if (ExpVal == 0) {
Builder.buildFConstant(Dst, 1.0);
MI.removeFromParent();
return;
}

if (ExpVal < 0)
ExpVal = -ExpVal;

// We use the simple binary decomposition method from SelectionDAG ExpandPowI
// to generate the multiply sequence. There are more optimal ways to do this
// (for example, powi(x,15) generates one more multiply than it should), but
// this has the benefit of being both really simple and much better than a
// libcall.
std::optional<SrcOp> Res;
SrcOp CurSquare = Base;
while (ExpVal > 0) {
if (ExpVal & 1) {
if (!Res)
Res = CurSquare;
else
Res = Builder.buildFMul(Ty, *Res, CurSquare);
}

CurSquare = Builder.buildFMul(Ty, CurSquare, CurSquare);
ExpVal >>= 1;
}

// If the original exponent was negative, invert the result, producing
// 1/(x*x*x).
if (Exponent < 0)
Res = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0), *Res,
MI.getFlags());

Builder.buildCopy(Dst, *Res);
MI.eraseFromParent();
}

bool CombinerHelper::matchSextOfTrunc(const MachineOperand &MO,
BuildFnTy &MatchInfo) {
GSext *Sext = cast<GSext>(getDefIgnoringCopies(MO.getReg(), MRI));
Expand Down
2 changes: 0 additions & 2 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7140,8 +7140,6 @@ LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
return UnableToLegalize;
}

// TODO: If RHS is a constant SelectionDAGBuilder expands this into a
// multiplication tree.
LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
auto [Dst, Src0, Src1] = MI.getFirst3Regs();
LLT Ty = MRI.getType(Dst);
Expand Down
26 changes: 26 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/combine-fpowi-optsize.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
; RUN: llc < %s -mtriple=aarch64-unknown-unknown -global-isel | FileCheck %s

define double @pow_optsize(double %x) nounwind optsize {
; CHECK-LABEL: pow_optsize:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov w0, #15 // =0xf
; CHECK-NEXT: b __powidf2
entry:
%0 = call double @llvm.powi.f64.i32(double %x, i32 15)
ret double %0
}

define double @pow_optsize_expand(double %x) nounwind optsize {
; CHECK-LABEL: pow_optsize_expand:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmul d0, d0, d0
; CHECK-NEXT: fmul d0, d0, d0
; CHECK-NEXT: fmul d0, d0, d0
; CHECK-NEXT: fmul d0, d0, d0
; CHECK-NEXT: ret
entry:
%0 = call double @llvm.powi.f64.i32(double %x, i32 16)
ret double %0
}
124 changes: 124 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/combine-fpowi.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s

---
name: fpowi_s64_zero
body: |
bb.0:
liveins: $d0, $w0
; CHECK-LABEL: name: fpowi_s64_zero
; CHECK: liveins: $d0, $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
; CHECK-NEXT: $d0 = COPY [[C]](s64)
%0:_(s64) = COPY $d0
%1:_(s32) = COPY $w0
%2:_(s32) = G_CONSTANT i32 0
%3:_(s64) = nnan ninf nsz arcp contract afn reassoc G_FPOWI %0, %2(s32)
$d0 = COPY %3(s64)
...

---
name: fpowi_s32_zero
body: |
bb.0:
liveins: $d0, $w0
; CHECK-LABEL: name: fpowi_s32_zero
; CHECK: liveins: $d0, $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
; CHECK-NEXT: $s0 = COPY [[C]](s32)
%0:_(s32) = COPY $s0
%1:_(s32) = COPY $w0
%2:_(s32) = G_CONSTANT i32 0
%3:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FPOWI %0, %2(s32)
$s0 = COPY %3(s32)
...

---
name: fpowi_positive
body: |
bb.0:
liveins: $d0, $w0
; CHECK-LABEL: name: fpowi_positive
; CHECK: liveins: $d0, $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[COPY]]
; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[FMUL]], [[FMUL]]
; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[FMUL1]]
; CHECK-NEXT: $d0 = COPY [[FMUL2]](s64)
%0:_(s64) = COPY $d0
%1:_(s32) = COPY $w0
%2:_(s32) = G_CONSTANT i32 5
%3:_(s64) = nnan ninf nsz arcp contract afn reassoc G_FPOWI %0, %2(s32)
$d0 = COPY %3(s64)
...

---
name: fpowi_s64_negative
body: |
bb.0:
liveins: $d0, $w0
; CHECK-LABEL: name: fpowi_s64_negative
; CHECK: liveins: $d0, $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[COPY]]
; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[FMUL]], [[FMUL]]
; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[FMUL1]]
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
; CHECK-NEXT: [[FDIV:%[0-9]+]]:_(s64) = nnan ninf nsz arcp contract afn reassoc G_FDIV [[C]], [[FMUL2]]
; CHECK-NEXT: $d0 = COPY [[FDIV]](s64)
%0:_(s64) = COPY $d0
%1:_(s32) = COPY $w0
%2:_(s32) = G_CONSTANT i32 -5
%3:_(s64) = nnan ninf nsz arcp contract afn reassoc G_FPOWI %0, %2(s32)
$d0 = COPY %3(s64)
...

---
name: fpowi_s32_negative
body: |
bb.0:
liveins: $d0, $w0
; CHECK-LABEL: name: fpowi_s32_negative
; CHECK: liveins: $d0, $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY]]
; CHECK-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FMUL]], [[FMUL]]
; CHECK-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[FMUL1]]
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
; CHECK-NEXT: [[FDIV:%[0-9]+]]:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FDIV [[C]], [[FMUL2]]
; CHECK-NEXT: $s0 = COPY [[FDIV]](s32)
%0:_(s32) = COPY $s0
%1:_(s32) = COPY $w0
%2:_(s32) = G_CONSTANT i32 -5
%3:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FPOWI %0, %2(s32)
$s0 = COPY %3(s32)
...

---
name: fpowi_libcall
body: |
bb.0:
liveins: $d0, $w0
; CHECK-LABEL: name: fpowi_libcall
; CHECK: liveins: $d0, $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[FPOWI:%[0-9]+]]:_(s64) = nnan ninf nsz arcp contract afn reassoc G_FPOWI [[COPY]], [[COPY1]](s32)
; CHECK-NEXT: $d0 = COPY [[FPOWI]](s64)
%0:_(s64) = COPY $d0
%1:_(s32) = COPY $w0
%2:_(s64) = nnan ninf nsz arcp contract afn reassoc G_FPOWI %0, %1(s32)
$d0 = COPY %2(s64)
...
Loading

0 comments on commit 937d79b

Please sign in to comment.