Skip to content

Commit

Permalink
fix IsNaN codegen for clang < 18.1. Refs numpy/numpy/issues/27313
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 670571080
  • Loading branch information
jan-wassenberg authored and copybara-github committed Sep 3, 2024
1 parent 6b98ac2 commit 710bcb2
Showing 1 changed file with 52 additions and 0 deletions.
52 changes: 52 additions & 0 deletions hwy/ops/arm_neon-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -5335,10 +5335,62 @@ HWY_API Vec128<int32_t, N> NearestInt(const Vec128<float, N> v) {
#endif

// ------------------------------ Floating-point classification

#if !HWY_COMPILER_CLANG || HWY_COMPILER_CLANG > 1801
template <typename T, size_t N>
HWY_API Mask128<T, N> IsNaN(const Vec128<T, N> v) {
return v != v;
}
#else
// Clang up to 18.1 generates less efficient code than the expected FCMEQ, see
// https://github.com/numpy/numpy/issues/27313 and
// https://github.com/numpy/numpy/pull/22954/files and
// https://github.com/llvm/llvm-project/issues/59855

#if HWY_HAVE_FLOAT16
template <typename T, size_t N, HWY_IF_F16(T), HWY_IF_V_SIZE(T, N, 16)>
HWY_API Mask128<T, N> IsNaN(const Vec128<T, N> v) {
Mask128<T, N> ret;
asm("fcmeq %0.8h, %1.8h, %1.8h" : "=w"(ret.raw) : "w"(v.raw));
return ret;
}
template <typename T, size_t N, HWY_IF_F16(T), HWY_IF_V_SIZE_LE(T, N, 8)>
HWY_API Mask128<T, N> IsNaN(const Vec128<T, N> v) {
Mask128<T, N> ret;
asm("fcmeq %0.4h, %1.4h, %1.4h" : "=w"(ret.raw) : "w"(v.raw));
return ret;
}
#endif // HWY_HAVE_FLOAT16

template <typename T, size_t N, HWY_IF_F32(T), HWY_IF_V_SIZE(T, N, 16)>
HWY_API Mask128<T, N> IsNaN(const Vec128<T, N> v) {
Mask128<T, N> ret;
asm("fcmeq %0.4s, %1.4s, %1.4s" : "=w"(ret.raw) : "w"(v.raw));
return ret;
}
template <typename T, size_t N, HWY_IF_F32(T), HWY_IF_V_SIZE_LE(T, N, 8)>
HWY_API Mask128<T, N> IsNaN(const Vec128<T, N> v) {
Mask128<T, N> ret;
asm("fcmeq %0.2s, %1.2s, %1.2s" : "=w"(ret.raw) : "w"(v.raw));
return ret;
}

#if HWY_HAVE_FLOAT64
template <typename T, size_t N, HWY_IF_F64(T), HWY_IF_V_SIZE(T, N, 16)>
HWY_API Mask128<T, N> IsNaN(const Vec128<T, N> v) {
Mask128<T, N> ret;
asm("fcmeq %0.2d, %1.2d, %1.2d" : "=w"(ret.raw) : "w"(v.raw));
return ret;
}
template <typename T, size_t N, HWY_IF_F64(T), HWY_IF_V_SIZE_LE(T, N, 8)>
HWY_API Mask128<T, N> IsNaN(const Vec128<T, N> v) {
Mask128<T, N> ret;
asm("fcmeq %d0, %d1, %d1" : "=w"(ret.raw) : "w"(v.raw));
return ret;
}
#endif // HWY_HAVE_FLOAT64

#endif // HWY_COMPILER_CLANG

// ================================================== SWIZZLE

Expand Down

0 comments on commit 710bcb2

Please sign in to comment.