Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Xtensa] Fix fp16 conversion #92

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -379,11 +379,22 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &tm,
// Needed so that we don't try to implement f128 constant loads using
// a load-and-extend of a f80 constant (in cases where the constant
// would fit in an f80).
for (MVT VT : MVT::fp_valuetypes())
for (MVT VT : MVT::fp_valuetypes()) {
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
}

setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);

// Floating-point truncation and stores need to be done separately.
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);

// Implement custom stack allocations
setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
Expand Down
166 changes: 166 additions & 0 deletions llvm/test/CodeGen/Xtensa/fp16.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=xtensa -mcpu=esp32 < %s | FileCheck --check-prefix=CHECK-ESP32 %s
; RUN: llc -mtriple=xtensa -mcpu=esp32s3 < %s | FileCheck --check-prefix=CHECK-ESP32S3 %s
; RUN: llc -mtriple=xtensa -mcpu=esp32s2 < %s | FileCheck --check-prefix=CHECK-ESP32S2 %s

target datalayout = "e-m:e-p:32:32-v1:8:8-i64:64-i128:128-n32"
target triple = "xtensa"

@x = global i16 12902
@y = global i16 0
@z = common global i16 0

define void @foo() nounwind {
; CHECK-ESP32-LABEL: foo:
; CHECK-ESP32: entry a1, 32
; CHECK-ESP32-NEXT: l32r a6, .LCPI0_0
; CHECK-ESP32-NEXT: l16ui a10, a6, 0
; CHECK-ESP32-NEXT: l32r a5, .LCPI0_1
; CHECK-ESP32-NEXT: callx8 a5
; CHECK-ESP32-NEXT: mov.n a7, a10
; CHECK-ESP32-NEXT: l32r a8, .LCPI0_2
; CHECK-ESP32-NEXT: l16ui a10, a8, 0
; CHECK-ESP32-NEXT: callx8 a5
; CHECK-ESP32-NEXT: wfr f8, a10
; CHECK-ESP32-NEXT: wfr f9, a7
; CHECK-ESP32-NEXT: add.s f8, f9, f8
; CHECK-ESP32-NEXT: rfr a10, f8
; CHECK-ESP32-NEXT: l32r a8, .LCPI0_3
; CHECK-ESP32-NEXT: callx8 a8
; CHECK-ESP32-NEXT: s16i a10, a6, 0
; CHECK-ESP32-NEXT: retw.n
;
; CHECK-ESP32S3-LABEL: foo:
; CHECK-ESP32S3: entry a1, 32
; CHECK-ESP32S3-NEXT: l32r a6, .LCPI0_0
; CHECK-ESP32S3-NEXT: l16ui a10, a6, 0
; CHECK-ESP32S3-NEXT: l32r a5, .LCPI0_1
; CHECK-ESP32S3-NEXT: callx8 a5
; CHECK-ESP32S3-NEXT: mov.n a7, a10
; CHECK-ESP32S3-NEXT: l32r a8, .LCPI0_2
; CHECK-ESP32S3-NEXT: l16ui a10, a8, 0
; CHECK-ESP32S3-NEXT: callx8 a5
; CHECK-ESP32S3-NEXT: wfr f8, a10
; CHECK-ESP32S3-NEXT: wfr f9, a7
; CHECK-ESP32S3-NEXT: add.s f8, f9, f8
; CHECK-ESP32S3-NEXT: rfr a10, f8
; CHECK-ESP32S3-NEXT: l32r a8, .LCPI0_3
; CHECK-ESP32S3-NEXT: callx8 a8
; CHECK-ESP32S3-NEXT: s16i a10, a6, 0
; CHECK-ESP32S3-NEXT: retw.n
;
; CHECK-ESP32S2-LABEL: foo:
; CHECK-ESP32S2: entry a1, 32
; CHECK-ESP32S2-NEXT: l32r a6, .LCPI0_0
; CHECK-ESP32S2-NEXT: l16ui a10, a6, 0
; CHECK-ESP32S2-NEXT: l32r a5, .LCPI0_1
; CHECK-ESP32S2-NEXT: callx8 a5
; CHECK-ESP32S2-NEXT: mov.n a7, a10
; CHECK-ESP32S2-NEXT: l32r a8, .LCPI0_2
; CHECK-ESP32S2-NEXT: l16ui a10, a8, 0
; CHECK-ESP32S2-NEXT: callx8 a5
; CHECK-ESP32S2-NEXT: mov.n a11, a10
; CHECK-ESP32S2-NEXT: l32r a8, .LCPI0_3
; CHECK-ESP32S2-NEXT: mov.n a10, a7
; CHECK-ESP32S2-NEXT: callx8 a8
; CHECK-ESP32S2-NEXT: l32r a7, .LCPI0_4
; CHECK-ESP32S2-NEXT: callx8 a7
; CHECK-ESP32S2-NEXT: l32r a8, .LCPI0_5
; CHECK-ESP32S2-NEXT: and a10, a10, a8
; CHECK-ESP32S2-NEXT: callx8 a5
; CHECK-ESP32S2-NEXT: callx8 a7
; CHECK-ESP32S2-NEXT: s16i a10, a6, 0
; CHECK-ESP32S2-NEXT: retw.n
entry:
%0 = load i16, ptr @x, align 2
%1 = load i16, ptr @y, align 2
%2 = tail call float @llvm.convert.from.fp16.f32(i16 %0)
%3 = tail call float @llvm.convert.from.fp16.f32(i16 %1)
%4 = fadd float %2, %3
%5 = tail call i16 @llvm.convert.to.fp16.f32(float %4)
store i16 %5, ptr @x, align 2
ret void
}

define double @test_from_fp16(i16 %in) {
; CHECK-ESP32-LABEL: test_from_fp16:
; CHECK-ESP32: entry a1, 32
; CHECK-ESP32-NEXT: l32r a8, .LCPI1_0
; CHECK-ESP32-NEXT: mov.n a10, a2
; CHECK-ESP32-NEXT: callx8 a8
; CHECK-ESP32-NEXT: l32r a8, .LCPI1_1
; CHECK-ESP32-NEXT: callx8 a8
; CHECK-ESP32-NEXT: mov.n a2, a10
; CHECK-ESP32-NEXT: mov.n a3, a11
; CHECK-ESP32-NEXT: retw.n
;
; CHECK-ESP32S3-LABEL: test_from_fp16:
; CHECK-ESP32S3: entry a1, 32
; CHECK-ESP32S3-NEXT: l32r a8, .LCPI1_0
; CHECK-ESP32S3-NEXT: mov.n a10, a2
; CHECK-ESP32S3-NEXT: callx8 a8
; CHECK-ESP32S3-NEXT: l32r a8, .LCPI1_1
; CHECK-ESP32S3-NEXT: callx8 a8
; CHECK-ESP32S3-NEXT: mov.n a2, a10
; CHECK-ESP32S3-NEXT: mov.n a3, a11
; CHECK-ESP32S3-NEXT: retw.n
;
; CHECK-ESP32S2-LABEL: test_from_fp16:
; CHECK-ESP32S2: entry a1, 32
; CHECK-ESP32S2-NEXT: l32r a8, .LCPI1_0
; CHECK-ESP32S2-NEXT: and a10, a2, a8
; CHECK-ESP32S2-NEXT: l32r a8, .LCPI1_1
; CHECK-ESP32S2-NEXT: callx8 a8
; CHECK-ESP32S2-NEXT: l32r a8, .LCPI1_2
; CHECK-ESP32S2-NEXT: callx8 a8
; CHECK-ESP32S2-NEXT: mov.n a2, a10
; CHECK-ESP32S2-NEXT: mov.n a3, a11
; CHECK-ESP32S2-NEXT: retw.n
%val = call double @llvm.convert.from.fp16.f64(i16 %in)
ret double %val
}

define i16 @test_to_fp16(double %in) {
; CHECK-ESP32-LABEL: test_to_fp16:
; CHECK-ESP32: entry a1, 32
; CHECK-ESP32-NEXT: l32r a8, .LCPI2_0
; CHECK-ESP32-NEXT: mov.n a10, a2
; CHECK-ESP32-NEXT: mov.n a11, a3
; CHECK-ESP32-NEXT: callx8 a8
; CHECK-ESP32-NEXT: l32r a8, .LCPI2_1
; CHECK-ESP32-NEXT: and a2, a10, a8
; CHECK-ESP32-NEXT: retw.n
;
; CHECK-ESP32S3-LABEL: test_to_fp16:
; CHECK-ESP32S3: entry a1, 32
; CHECK-ESP32S3-NEXT: l32r a8, .LCPI2_0
; CHECK-ESP32S3-NEXT: mov.n a10, a2
; CHECK-ESP32S3-NEXT: mov.n a11, a3
; CHECK-ESP32S3-NEXT: callx8 a8
; CHECK-ESP32S3-NEXT: l32r a8, .LCPI2_1
; CHECK-ESP32S3-NEXT: and a2, a10, a8
; CHECK-ESP32S3-NEXT: retw.n
;
; CHECK-ESP32S2-LABEL: test_to_fp16:
; CHECK-ESP32S2: entry a1, 32
; CHECK-ESP32S2-NEXT: l32r a8, .LCPI2_0
; CHECK-ESP32S2-NEXT: mov.n a10, a2
; CHECK-ESP32S2-NEXT: mov.n a11, a3
; CHECK-ESP32S2-NEXT: callx8 a8
; CHECK-ESP32S2-NEXT: l32r a8, .LCPI2_1
; CHECK-ESP32S2-NEXT: and a10, a10, a8
; CHECK-ESP32S2-NEXT: l32r a8, .LCPI2_2
; CHECK-ESP32S2-NEXT: callx8 a8
; CHECK-ESP32S2-NEXT: l32r a8, .LCPI2_3
; CHECK-ESP32S2-NEXT: callx8 a8
; CHECK-ESP32S2-NEXT: mov.n a2, a10
; CHECK-ESP32S2-NEXT: retw.n
%val = call i16 @llvm.convert.to.fp16.f64(double %in)
ret i16 %val
}

declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone

declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
declare i16 @llvm.convert.to.fp16.f64(double) nounwind readnone
Loading