Skip to content

Commit

Permalink
asin, acos.
Browse files Browse the repository at this point in the history
  • Loading branch information
bluescarni committed Aug 31, 2023
1 parent 6aafc94 commit 7c38c7a
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 106 deletions.
108 changes: 2 additions & 106 deletions src/detail/llvm_helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3745,59 +3745,7 @@ void llvm_add_inv_kep_E_wrapper(llvm_state &s, llvm::Type *scal_t, std::uint32_t
// Inverse cosine.
llvm::Value *llvm_acos(llvm_state &s, llvm::Value *x)
{
// LCOV_EXCL_START
assert(x != nullptr);
// LCOV_EXCL_STOP

auto &context = s.context();

// Determine the scalar type of the argument.
auto *x_t = x->getType()->getScalarType();

if (x_t == to_llvm_type<double>(context, false)) {
if (auto *vec_t = llvm::dyn_cast<llvm_vector_type>(x->getType())) {
if (const auto sfn = sleef_function_name(context, "acos", x_t,
boost::numeric_cast<std::uint32_t>(vec_t->getNumElements()));
!sfn.empty()) {
return llvm_invoke_external(
s, sfn, vec_t, {x},
// NOTE: in theory we may add ReadNone here as well,
// but for some reason, at least up to LLVM 10,
// this causes strange codegen issues. Revisit
// in the future.
llvm::AttributeList::get(
context, llvm::AttributeList::FunctionIndex,
{llvm::Attribute::NoUnwind, llvm::Attribute::Speculatable, llvm::Attribute::WillReturn}));
}
}

return call_extern_vec(s, {x}, "acos");
} else if (x_t == to_llvm_type<long double>(context, false)) {
return call_extern_vec(s, {x},
#if defined(_MSC_VER)
// NOTE: it seems like the MSVC stdlib does not have an acos function,
// because LLVM complains about the symbol "acosl" not being
// defined. Hence, use our own wrapper instead.
"heyoka_acosl"
#else
"acosl"
#endif
);
#if defined(HEYOKA_HAVE_REAL128)
} else if (x_t == to_llvm_type<mppp::real128>(context, false)) {
return call_extern_vec(s, {x}, "acosq");
#endif
#if defined(HEYOKA_HAVE_REAL)
} else if (llvm_is_real(x->getType()) != 0) {
auto *f = real_nary_op(s, x->getType(), "mpfr_acos", 1u);
return s.builder().CreateCall(f, {x});
#endif
} else {
// LCOV_EXCL_START
throw std::invalid_argument(fmt::format("Invalid type '{}' encountered in the LLVM implementation of acos()",
llvm_type_name(x->getType())));
// LCOV_EXCL_STOP
}
return llvm_math_cmath(s, "acos", x);
}

// Inverse hyperbolic cosine.
Expand Down Expand Up @@ -3861,59 +3809,7 @@ llvm::Value *llvm_acosh(llvm_state &s, llvm::Value *x)
// Inverse sine.
llvm::Value *llvm_asin(llvm_state &s, llvm::Value *x)
{
// LCOV_EXCL_START
assert(x != nullptr);
// LCOV_EXCL_STOP

auto &context = s.context();

// Determine the scalar type of the argument.
auto *x_t = x->getType()->getScalarType();

if (x_t == to_llvm_type<double>(context, false)) {
if (auto *vec_t = llvm::dyn_cast<llvm_vector_type>(x->getType())) {
if (const auto sfn = sleef_function_name(context, "asin", x_t,
boost::numeric_cast<std::uint32_t>(vec_t->getNumElements()));
!sfn.empty()) {
return llvm_invoke_external(
s, sfn, vec_t, {x},
// NOTE: in theory we may add ReadNone here as well,
// but for some reason, at least up to LLVM 10,
// this causes strange codegen issues. Revisit
// in the future.
llvm::AttributeList::get(
context, llvm::AttributeList::FunctionIndex,
{llvm::Attribute::NoUnwind, llvm::Attribute::Speculatable, llvm::Attribute::WillReturn}));
}
}

return call_extern_vec(s, {x}, "asin");
} else if (x_t == to_llvm_type<long double>(context, false)) {
return call_extern_vec(s, {x},
#if defined(_MSC_VER)
// NOTE: it seems like the MSVC stdlib does not have an asin function,
// because LLVM complains about the symbol "asinl" not being
// defined. Hence, use our own wrapper instead.
"heyoka_asinl"
#else
"asinl"
#endif
);
#if defined(HEYOKA_HAVE_REAL128)
} else if (x_t == to_llvm_type<mppp::real128>(context, false)) {
return call_extern_vec(s, {x}, "asinq");
#endif
#if defined(HEYOKA_HAVE_REAL)
} else if (llvm_is_real(x->getType()) != 0) {
auto *f = real_nary_op(s, x->getType(), "mpfr_asin", 1u);
return s.builder().CreateCall(f, {x});
#endif
} else {
// LCOV_EXCL_START
throw std::invalid_argument(fmt::format("Invalid type '{}' encountered in the LLVM implementation of asin()",
llvm_type_name(x->getType())));
// LCOV_EXCL_STOP
}
return llvm_math_cmath(s, "asin", x);
}

// Inverse hyperbolic sine.
Expand Down
2 changes: 2 additions & 0 deletions src/detail/vector_math.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ auto make_vf_map()
add_vfinfo_sleef(retval, "llvm.pow.f64", "pow", "d", 2);
add_vfinfo_sleef(retval, "sinh", "sinh", "d");
add_vfinfo_sleef(retval, "cosh", "cosh", "d");
add_vfinfo_sleef(retval, "asin", "asin", "d");
add_vfinfo_sleef(retval, "acos", "acos", "d");

#endif

Expand Down
64 changes: 64 additions & 0 deletions test/acos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#include <type_traits>
#include <vector>

#include <boost/algorithm/string/find_iterator.hpp>
#include <boost/algorithm/string/finder.hpp>
#include <boost/algorithm/string/predicate.hpp>

#include <llvm/Config/llvm-config.h>
Expand Down Expand Up @@ -223,3 +225,65 @@ TEST_CASE("normalise")
REQUIRE(normalise(acos(x)) == acos(x));
REQUIRE(normalise(subs(acos(x), {{x, .1_dbl}})) == acos(.1_dbl));
}

// Test to check vectorisation via the vector-function-abi-variant machinery.
TEST_CASE("vfabi")
{
llvm_state s;

auto [a, b] = make_vars("a", "b");

add_cfunc<double>(s, "cfunc", {acos(a), acos(b)});

s.compile();

auto *cf_ptr
= reinterpret_cast<void (*)(double *, const double *, const double *, const double *)>(s.jit_lookup("cfunc"));

const std::vector ins{.1, .2};
std::vector<double> outs(2u, 0.);

cf_ptr(outs.data(), ins.data(), nullptr, nullptr);

REQUIRE(outs[0] == approximately(std::acos(.1)));
REQUIRE(outs[1] == approximately(std::acos(.2)));

// NOTE: autovec with external scalar functions seems to work
// only since LLVM 16.
#if defined(HEYOKA_WITH_SLEEF) && LLVM_VERSION_MAJOR >= 16

const auto &tf = detail::get_target_features();

auto ir = s.get_ir();

using string_find_iterator = boost::find_iterator<std::string::iterator>;

auto count = 0u;
for (auto it = boost::make_find_iterator(ir, boost::first_finder("@acos", boost::is_iequal()));
it != string_find_iterator(); ++it) {
++count;
}

// NOTE: at the moment we have comprehensive coverage of LLVM versions
// in the CI only for x86_64.
if (tf.sse2) {
// NOTE: occurrences of the scalar version:
// - 2 calls in the strided cfunc,
// - 1 declaration.
REQUIRE(count == 3u);
}

if (tf.aarch64) {
REQUIRE(count == 3u);
}

// NOTE: currently no auto-vectorization happens on ppc64 due apparently
// to the way the target machine is being set up by orc/lljit (it works
// fine with the opt tool). When this is resolved, we can test ppc64 too.

// if (tf.vsx) {
// REQUIRE(count == 3u);
// }

#endif
}
64 changes: 64 additions & 0 deletions test/asin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#include <type_traits>
#include <vector>

#include <boost/algorithm/string/find_iterator.hpp>
#include <boost/algorithm/string/finder.hpp>
#include <boost/algorithm/string/predicate.hpp>

#include <llvm/Config/llvm-config.h>
Expand Down Expand Up @@ -223,3 +225,65 @@ TEST_CASE("normalise")
REQUIRE(normalise(asin(x)) == asin(x));
REQUIRE(normalise(subs(asin(x), {{x, .1_dbl}})) == asin(.1_dbl));
}

// Test to check vectorisation via the vector-function-abi-variant machinery.
TEST_CASE("vfabi")
{
llvm_state s;

auto [a, b] = make_vars("a", "b");

add_cfunc<double>(s, "cfunc", {asin(a), asin(b)});

s.compile();

auto *cf_ptr
= reinterpret_cast<void (*)(double *, const double *, const double *, const double *)>(s.jit_lookup("cfunc"));

const std::vector ins{.1, .2};
std::vector<double> outs(2u, 0.);

cf_ptr(outs.data(), ins.data(), nullptr, nullptr);

REQUIRE(outs[0] == approximately(std::asin(.1)));
REQUIRE(outs[1] == approximately(std::asin(.2)));

// NOTE: autovec with external scalar functions seems to work
// only since LLVM 16.
#if defined(HEYOKA_WITH_SLEEF) && LLVM_VERSION_MAJOR >= 16

const auto &tf = detail::get_target_features();

auto ir = s.get_ir();

using string_find_iterator = boost::find_iterator<std::string::iterator>;

auto count = 0u;
for (auto it = boost::make_find_iterator(ir, boost::first_finder("@asin", boost::is_iequal()));
it != string_find_iterator(); ++it) {
++count;
}

// NOTE: at the moment we have comprehensive coverage of LLVM versions
// in the CI only for x86_64.
if (tf.sse2) {
// NOTE: occurrences of the scalar version:
// - 2 calls in the strided cfunc,
// - 1 declaration.
REQUIRE(count == 3u);
}

if (tf.aarch64) {
REQUIRE(count == 3u);
}

// NOTE: currently no auto-vectorization happens on ppc64 due apparently
// to the way the target machine is being set up by orc/lljit (it works
// fine with the opt tool). When this is resolved, we can test ppc64 too.

// if (tf.vsx) {
// REQUIRE(count == 3u);
// }

#endif
}

0 comments on commit 7c38c7a

Please sign in to comment.