Skip to content

Commit

Permalink
[Xtensa] Add fp16 conversion support
Browse files Browse the repository at this point in the history
  • Loading branch information
gerekon committed Apr 2, 2024
1 parent e6e27d9 commit 9a0907f
Show file tree
Hide file tree
Showing 2 changed files with 178 additions and 1 deletion.
13 changes: 12 additions & 1 deletion llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -379,11 +379,22 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &tm,
// Needed so that we don't try to implement f128 constant loads using
// a load-and-extend of a f80 constant (in cases where the constant
// would fit in an f80).
for (MVT VT : MVT::fp_valuetypes())
for (MVT VT : MVT::fp_valuetypes()) {
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
}

setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);

// Floating-point truncation and stores need to be done separately.
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);

// Implement custom stack allocations
setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
Expand Down
166 changes: 166 additions & 0 deletions llvm/test/CodeGen/Xtensa/fp16.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=xtensa -mcpu=esp32 < %s | FileCheck --check-prefix=CHECK-ESP32 %s
; RUN: llc -mtriple=xtensa -mcpu=esp32s3 < %s | FileCheck --check-prefix=CHECK-ESP32S3 %s
; RUN: llc -mtriple=xtensa -mcpu=esp32s2 < %s | FileCheck --check-prefix=CHECK-ESP32S2 %s

target datalayout = "e-m:e-p:32:32-v1:8:8-i64:64-i128:128-n32"
target triple = "xtensa"

@x = global i16 12902
@y = global i16 0
@z = common global i16 0

define void @foo() nounwind {
; CHECK-ESP32-LABEL: foo:
; CHECK-ESP32: entry a1, 32
; CHECK-ESP32-NEXT: l32r a6, .LCPI0_0
; CHECK-ESP32-NEXT: l16ui a10, a6, 0
; CHECK-ESP32-NEXT: l32r a5, .LCPI0_1
; CHECK-ESP32-NEXT: callx8 a5
; CHECK-ESP32-NEXT: mov.n a7, a10
; CHECK-ESP32-NEXT: l32r a8, .LCPI0_2
; CHECK-ESP32-NEXT: l16ui a10, a8, 0
; CHECK-ESP32-NEXT: callx8 a5
; CHECK-ESP32-NEXT: wfr f8, a10
; CHECK-ESP32-NEXT: wfr f9, a7
; CHECK-ESP32-NEXT: add.s f8, f9, f8
; CHECK-ESP32-NEXT: rfr a10, f8
; CHECK-ESP32-NEXT: l32r a8, .LCPI0_3
; CHECK-ESP32-NEXT: callx8 a8
; CHECK-ESP32-NEXT: s16i a10, a6, 0
; CHECK-ESP32-NEXT: retw.n
;
; CHECK-ESP32S3-LABEL: foo:
; CHECK-ESP32S3: entry a1, 32
; CHECK-ESP32S3-NEXT: l32r a6, .LCPI0_0
; CHECK-ESP32S3-NEXT: l16ui a10, a6, 0
; CHECK-ESP32S3-NEXT: l32r a5, .LCPI0_1
; CHECK-ESP32S3-NEXT: callx8 a5
; CHECK-ESP32S3-NEXT: mov.n a7, a10
; CHECK-ESP32S3-NEXT: l32r a8, .LCPI0_2
; CHECK-ESP32S3-NEXT: l16ui a10, a8, 0
; CHECK-ESP32S3-NEXT: callx8 a5
; CHECK-ESP32S3-NEXT: wfr f8, a10
; CHECK-ESP32S3-NEXT: wfr f9, a7
; CHECK-ESP32S3-NEXT: add.s f8, f9, f8
; CHECK-ESP32S3-NEXT: rfr a10, f8
; CHECK-ESP32S3-NEXT: l32r a8, .LCPI0_3
; CHECK-ESP32S3-NEXT: callx8 a8
; CHECK-ESP32S3-NEXT: s16i a10, a6, 0
; CHECK-ESP32S3-NEXT: retw.n
;
; CHECK-ESP32S2-LABEL: foo:
; CHECK-ESP32S2: entry a1, 32
; CHECK-ESP32S2-NEXT: l32r a6, .LCPI0_0
; CHECK-ESP32S2-NEXT: l16ui a10, a6, 0
; CHECK-ESP32S2-NEXT: l32r a5, .LCPI0_1
; CHECK-ESP32S2-NEXT: callx8 a5
; CHECK-ESP32S2-NEXT: mov.n a7, a10
; CHECK-ESP32S2-NEXT: l32r a8, .LCPI0_2
; CHECK-ESP32S2-NEXT: l16ui a10, a8, 0
; CHECK-ESP32S2-NEXT: callx8 a5
; CHECK-ESP32S2-NEXT: mov.n a11, a10
; CHECK-ESP32S2-NEXT: l32r a8, .LCPI0_3
; CHECK-ESP32S2-NEXT: mov.n a10, a7
; CHECK-ESP32S2-NEXT: callx8 a8
; CHECK-ESP32S2-NEXT: l32r a7, .LCPI0_4
; CHECK-ESP32S2-NEXT: callx8 a7
; CHECK-ESP32S2-NEXT: l32r a8, .LCPI0_5
; CHECK-ESP32S2-NEXT: and a10, a10, a8
; CHECK-ESP32S2-NEXT: callx8 a5
; CHECK-ESP32S2-NEXT: callx8 a7
; CHECK-ESP32S2-NEXT: s16i a10, a6, 0
; CHECK-ESP32S2-NEXT: retw.n
entry:
%0 = load i16, ptr @x, align 2
%1 = load i16, ptr @y, align 2
%2 = tail call float @llvm.convert.from.fp16.f32(i16 %0)
%3 = tail call float @llvm.convert.from.fp16.f32(i16 %1)
%4 = fadd float %2, %3
%5 = tail call i16 @llvm.convert.to.fp16.f32(float %4)
store i16 %5, ptr @x, align 2
ret void
}

define double @test_from_fp16(i16 %in) {
; CHECK-ESP32-LABEL: test_from_fp16:
; CHECK-ESP32: entry a1, 32
; CHECK-ESP32-NEXT: l32r a8, .LCPI1_0
; CHECK-ESP32-NEXT: mov.n a10, a2
; CHECK-ESP32-NEXT: callx8 a8
; CHECK-ESP32-NEXT: l32r a8, .LCPI1_1
; CHECK-ESP32-NEXT: callx8 a8
; CHECK-ESP32-NEXT: mov.n a2, a10
; CHECK-ESP32-NEXT: mov.n a3, a11
; CHECK-ESP32-NEXT: retw.n
;
; CHECK-ESP32S3-LABEL: test_from_fp16:
; CHECK-ESP32S3: entry a1, 32
; CHECK-ESP32S3-NEXT: l32r a8, .LCPI1_0
; CHECK-ESP32S3-NEXT: mov.n a10, a2
; CHECK-ESP32S3-NEXT: callx8 a8
; CHECK-ESP32S3-NEXT: l32r a8, .LCPI1_1
; CHECK-ESP32S3-NEXT: callx8 a8
; CHECK-ESP32S3-NEXT: mov.n a2, a10
; CHECK-ESP32S3-NEXT: mov.n a3, a11
; CHECK-ESP32S3-NEXT: retw.n
;
; CHECK-ESP32S2-LABEL: test_from_fp16:
; CHECK-ESP32S2: entry a1, 32
; CHECK-ESP32S2-NEXT: l32r a8, .LCPI1_0
; CHECK-ESP32S2-NEXT: and a10, a2, a8
; CHECK-ESP32S2-NEXT: l32r a8, .LCPI1_1
; CHECK-ESP32S2-NEXT: callx8 a8
; CHECK-ESP32S2-NEXT: l32r a8, .LCPI1_2
; CHECK-ESP32S2-NEXT: callx8 a8
; CHECK-ESP32S2-NEXT: mov.n a2, a10
; CHECK-ESP32S2-NEXT: mov.n a3, a11
; CHECK-ESP32S2-NEXT: retw.n
%val = call double @llvm.convert.from.fp16.f64(i16 %in)
ret double %val
}

define i16 @test_to_fp16(double %in) {
; CHECK-ESP32-LABEL: test_to_fp16:
; CHECK-ESP32: entry a1, 32
; CHECK-ESP32-NEXT: l32r a8, .LCPI2_0
; CHECK-ESP32-NEXT: mov.n a10, a2
; CHECK-ESP32-NEXT: mov.n a11, a3
; CHECK-ESP32-NEXT: callx8 a8
; CHECK-ESP32-NEXT: l32r a8, .LCPI2_1
; CHECK-ESP32-NEXT: and a2, a10, a8
; CHECK-ESP32-NEXT: retw.n
;
; CHECK-ESP32S3-LABEL: test_to_fp16:
; CHECK-ESP32S3: entry a1, 32
; CHECK-ESP32S3-NEXT: l32r a8, .LCPI2_0
; CHECK-ESP32S3-NEXT: mov.n a10, a2
; CHECK-ESP32S3-NEXT: mov.n a11, a3
; CHECK-ESP32S3-NEXT: callx8 a8
; CHECK-ESP32S3-NEXT: l32r a8, .LCPI2_1
; CHECK-ESP32S3-NEXT: and a2, a10, a8
; CHECK-ESP32S3-NEXT: retw.n
;
; CHECK-ESP32S2-LABEL: test_to_fp16:
; CHECK-ESP32S2: entry a1, 32
; CHECK-ESP32S2-NEXT: l32r a8, .LCPI2_0
; CHECK-ESP32S2-NEXT: mov.n a10, a2
; CHECK-ESP32S2-NEXT: mov.n a11, a3
; CHECK-ESP32S2-NEXT: callx8 a8
; CHECK-ESP32S2-NEXT: l32r a8, .LCPI2_1
; CHECK-ESP32S2-NEXT: and a10, a10, a8
; CHECK-ESP32S2-NEXT: l32r a8, .LCPI2_2
; CHECK-ESP32S2-NEXT: callx8 a8
; CHECK-ESP32S2-NEXT: l32r a8, .LCPI2_3
; CHECK-ESP32S2-NEXT: callx8 a8
; CHECK-ESP32S2-NEXT: mov.n a2, a10
; CHECK-ESP32S2-NEXT: retw.n
%val = call i16 @llvm.convert.to.fp16.f64(double %in)
ret i16 %val
}

declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone

declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
declare i16 @llvm.convert.to.fp16.f64(double) nounwind readnone

0 comments on commit 9a0907f

Please sign in to comment.