diff --git a/doc/changelog.rst b/doc/changelog.rst index ea606b729..0afa6679a 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -24,9 +24,10 @@ Changes - **BREAKING**: the minimum supported LLVM version is now 11 (`#342 `__). This is a :ref:`breaking change `. -- The LLVM SLP vectorizer is now enabled when JIT compiling - with optimisations +- The LLVM SLP vectorizer can now be enabled (`#341 `__). + This feature is opt-in due to the fact that enabling it + can considerably increase JIT compilation times. - The optimisation level for an ``llvm_state`` is now clamped within the ``[0, 3]`` range (`#340 `__). diff --git a/include/heyoka/llvm_state.hpp b/include/heyoka/llvm_state.hpp index 51f00dfa9..ad6e08901 100644 --- a/include/heyoka/llvm_state.hpp +++ b/include/heyoka/llvm_state.hpp @@ -85,6 +85,7 @@ IGOR_MAKE_NAMED_ARGUMENT(fast_math); // can get rid of this in the future when AVX512 implementations improve // and LLVM learns to discriminate good and bad implementations. IGOR_MAKE_NAMED_ARGUMENT(force_avx512); +IGOR_MAKE_NAMED_ARGUMENT(slp_vectorize); } // namespace kw @@ -134,6 +135,7 @@ class HEYOKA_DLL_PUBLIC llvm_state std::string m_bc_snapshot; bool m_fast_math; bool m_force_avx512; + bool m_slp_vectorize; std::string m_module_name; // Serialization. @@ -205,10 +207,19 @@ class HEYOKA_DLL_PUBLIC llvm_state } }(); - return std::tuple{std::move(mod_name), opt_level, fmath, force_avx512}; + // Enable SLP vectorization (defaults to false). + auto slp_vectorize = [&p]() -> bool { + if constexpr (p.has(kw::slp_vectorize)) { + return std::forward(p(kw::slp_vectorize)); + } else { + return false; + } + }(); + + return std::tuple{std::move(mod_name), opt_level, fmath, force_avx512, slp_vectorize}; } } - explicit llvm_state(std::tuple &&); + explicit llvm_state(std::tuple &&); // Small shared helper to setup the math flags in the builder at the // end of a constructor. @@ -254,6 +265,8 @@ class HEYOKA_DLL_PUBLIC llvm_state [[nodiscard]] bool force_avx512() const; [[nodiscard]] unsigned get_opt_level() const; void set_opt_level(unsigned); + [[nodiscard]] bool get_slp_vectorize() const; + void set_slp_vectorize(bool); [[nodiscard]] std::string get_ir() const; [[nodiscard]] std::string get_bc() const; @@ -298,7 +311,7 @@ HEYOKA_END_NAMESPACE // - version 1: got rid of the inline_functions setting; // - version 2: added the force_avx512 setting; // - version 3: added the bitcode snapshot, simplified -// compilation logic. +// compilation logic, slp_vectorize flag. BOOST_CLASS_VERSION(heyoka::llvm_state, 3) #endif diff --git a/src/llvm_state.cpp b/src/llvm_state.cpp index 799df606c..702e9d7d0 100644 --- a/src/llvm_state.cpp +++ b/src/llvm_state.cpp @@ -569,9 +569,9 @@ auto llvm_state_bc_to_module(const std::string &module_name, const std::string & } // namespace detail -llvm_state::llvm_state(std::tuple &&tup) +llvm_state::llvm_state(std::tuple &&tup) : m_jitter(std::make_unique()), m_opt_level(std::get<1>(tup)), m_fast_math(std::get<2>(tup)), - m_force_avx512(std::get<3>(tup)), m_module_name(std::move(std::get<0>(tup))) + m_force_avx512(std::get<3>(tup)), m_slp_vectorize(std::get<4>(tup)), m_module_name(std::move(std::get<0>(tup))) { // Create the module. m_module = std::make_unique(m_module_name, context()); @@ -595,7 +595,7 @@ llvm_state::llvm_state(const llvm_state &other) // - creating a new jit, // - copying over the options from other. : m_jitter(std::make_unique()), m_opt_level(other.m_opt_level), m_fast_math(other.m_fast_math), - m_force_avx512(other.m_force_avx512), m_module_name(other.m_module_name) + m_force_avx512(other.m_force_avx512), m_slp_vectorize(other.m_slp_vectorize), m_module_name(other.m_module_name) { if (other.is_compiled()) { // 'other' was compiled. @@ -652,6 +652,7 @@ llvm_state &llvm_state::operator=(llvm_state &&other) noexcept m_bc_snapshot = std::move(other.m_bc_snapshot); m_fast_math = other.m_fast_math; m_force_avx512 = other.m_force_avx512; + m_slp_vectorize = other.m_slp_vectorize; m_module_name = std::move(other.m_module_name); } @@ -687,6 +688,7 @@ void llvm_state::save_impl(Archive &ar, unsigned) const ar << m_opt_level; ar << m_fast_math; ar << m_force_avx512; + ar << m_slp_vectorize; ar << m_module_name; // Store the bitcode. @@ -757,6 +759,10 @@ void llvm_state::load_impl(Archive &ar, unsigned version) bool force_avx512{}; ar >> force_avx512; + // NOLINTNEXTLINE(misc-const-correctness) + bool slp_vectorize{}; + ar >> slp_vectorize; + // NOLINTNEXTLINE(misc-const-correctness) std::string module_name; ar >> module_name; @@ -783,6 +789,7 @@ void llvm_state::load_impl(Archive &ar, unsigned version) m_opt_level = opt_level; m_fast_math = fast_math; m_force_avx512 = force_avx512; + m_slp_vectorize = slp_vectorize; m_module_name = module_name; // Reset module and builder to the def-cted state. @@ -889,6 +896,16 @@ bool llvm_state::force_avx512() const return m_force_avx512; } +bool llvm_state::get_slp_vectorize() const +{ + return m_slp_vectorize; +} + +void llvm_state::set_slp_vectorize(bool flag) +{ + m_slp_vectorize = flag; +} + unsigned llvm_state::clamp_opt_level(unsigned opt_level) { return std::min(opt_level, 3u); @@ -1038,7 +1055,7 @@ void llvm_state::optimise() // Create the new pass manager builder, passing // the native target machine from the JIT class. - // NOTE: we turn manually on the SLP vectoriser here, which is off + // NOTE: if requested, we turn manually on the SLP vectoriser here, which is off // by default. Not sure why it is off, the LLVM docs imply this // is on by default at nonzero optimisation levels for clang and opt. // NOTE: the reason for this inconsistency is that opt uses PB.parsePassPipeline() @@ -1050,7 +1067,7 @@ void llvm_state::optimise() // switching to this alternative way of setting up the optimisation pipeline // in the future. llvm::PipelineTuningOptions pto; - pto.SLPVectorization = true; + pto.SLPVectorization = m_slp_vectorize; llvm::PassBuilder PB(m_jitter->m_tm.get(), pto); // Register all the basic analyses with the managers. @@ -1108,10 +1125,10 @@ void llvm_state::optimise() pm_builder.OptLevel = m_opt_level; // Enable function inlining. pm_builder.Inliner = llvm::createFunctionInliningPass(m_opt_level, 0, false); - // NOTE: we turn manually on the SLP vectoriser here, which is off + // NOTE: if requested, we turn manually on the SLP vectoriser here, which is off // by default. Not sure why it is off, the LLVM docs imply this // is on by default at nonzero optimisation levels for clang and opt. - pm_builder.SLPVectorize = true; + pm_builder.SLPVectorize = m_slp_vectorize; m_jitter->m_tm->adjustPassManager(pm_builder); @@ -1217,10 +1234,11 @@ void llvm_state::compile() // Fetch the bitcode *before* optimisation. auto orig_bc = get_bc(); - // Combine m_opt_level and m_force_avx512 into a single value, - // as they both affect codegen. + // Combine m_opt_level, m_force_avx512 and m_slp_vectorize into a single value, + // as they all affect codegen. assert(m_opt_level <= 3u); - const auto olevel = m_opt_level + (static_cast(m_force_avx512) << 2); + const auto olevel = m_opt_level + (static_cast(m_force_avx512) << 2) + + (static_cast(m_slp_vectorize) << 3); if (auto cached_data = detail::llvm_state_mem_cache_lookup(orig_bc, olevel)) { // Cache hit. @@ -1361,7 +1379,7 @@ const std::string &llvm_state::module_name() const llvm_state llvm_state::make_similar() const { return llvm_state(kw::mname = m_module_name, kw::opt_level = m_opt_level, kw::fast_math = m_fast_math, - kw::force_avx512 = m_force_avx512); + kw::force_avx512 = m_force_avx512, kw::slp_vectorize = m_slp_vectorize); } std::ostream &operator<<(std::ostream &os, const llvm_state &s) @@ -1373,6 +1391,7 @@ std::ostream &operator<<(std::ostream &os, const llvm_state &s) oss << "Compiled : " << s.is_compiled() << '\n'; oss << "Fast math : " << s.m_fast_math << '\n'; oss << "Force AVX512 : " << s.m_force_avx512 << '\n'; + oss << "SLP vectorization : " << s.m_slp_vectorize << '\n'; oss << "Optimisation level: " << s.m_opt_level << '\n'; oss << "Data layout : " << s.m_jitter->m_lljit->getDataLayout().getStringRepresentation() << '\n'; oss << "Target triple : " << s.m_jitter->get_target_triple().str() << '\n'; diff --git a/test/acos.cpp b/test/acos.cpp index ee4e0a02f..f85cfb17c 100644 --- a/test/acos.cpp +++ b/test/acos.cpp @@ -229,7 +229,7 @@ TEST_CASE("normalise") // Test to check vectorisation via the vector-function-abi-variant machinery. TEST_CASE("vfabi") { - llvm_state s; + llvm_state s{kw::slp_vectorize = true}; auto [a, b] = make_vars("a", "b"); diff --git a/test/acosh.cpp b/test/acosh.cpp index 4af510c7d..5681dcf0d 100644 --- a/test/acosh.cpp +++ b/test/acosh.cpp @@ -229,7 +229,7 @@ TEST_CASE("normalise") // Test to check vectorisation via the vector-function-abi-variant machinery. TEST_CASE("vfabi") { - llvm_state s; + llvm_state s{kw::slp_vectorize = true}; auto [a, b] = make_vars("a", "b"); diff --git a/test/asin.cpp b/test/asin.cpp index 8719933a0..fbb382999 100644 --- a/test/asin.cpp +++ b/test/asin.cpp @@ -229,7 +229,7 @@ TEST_CASE("normalise") // Test to check vectorisation via the vector-function-abi-variant machinery. TEST_CASE("vfabi") { - llvm_state s; + llvm_state s{kw::slp_vectorize = true}; auto [a, b] = make_vars("a", "b"); diff --git a/test/asinh.cpp b/test/asinh.cpp index 9a9bba472..9b32cfa8a 100644 --- a/test/asinh.cpp +++ b/test/asinh.cpp @@ -229,7 +229,7 @@ TEST_CASE("normalise") // Test to check vectorisation via the vector-function-abi-variant machinery. TEST_CASE("vfabi") { - llvm_state s; + llvm_state s{kw::slp_vectorize = true}; auto [a, b] = make_vars("a", "b"); diff --git a/test/atan.cpp b/test/atan.cpp index 342923db7..7a3142fac 100644 --- a/test/atan.cpp +++ b/test/atan.cpp @@ -223,7 +223,7 @@ TEST_CASE("normalise") // Test to check vectorisation via the vector-function-abi-variant machinery. TEST_CASE("vfabi") { - llvm_state s; + llvm_state s{kw::slp_vectorize = true}; auto [a, b] = make_vars("a", "b"); diff --git a/test/atan2.cpp b/test/atan2.cpp index 11eed9341..27174c0cc 100644 --- a/test/atan2.cpp +++ b/test/atan2.cpp @@ -327,7 +327,7 @@ TEST_CASE("normalise") // Test to check vectorisation via the vector-function-abi-variant machinery. TEST_CASE("vfabi") { - llvm_state s; + llvm_state s{kw::slp_vectorize = true}; auto [a, b] = make_vars("a", "b"); diff --git a/test/atanh.cpp b/test/atanh.cpp index 1e1cead38..783a85699 100644 --- a/test/atanh.cpp +++ b/test/atanh.cpp @@ -223,7 +223,7 @@ TEST_CASE("normalise") // Test to check vectorisation via the vector-function-abi-variant machinery. TEST_CASE("vfabi") { - llvm_state s; + llvm_state s{kw::slp_vectorize = true}; auto [a, b] = make_vars("a", "b"); diff --git a/test/cos.cpp b/test/cos.cpp index eda18c300..add97924b 100644 --- a/test/cos.cpp +++ b/test/cos.cpp @@ -246,7 +246,7 @@ TEST_CASE("normalise") // Test to check vectorisation via the vector-function-abi-variant machinery. TEST_CASE("vfabi") { - llvm_state s; + llvm_state s{kw::slp_vectorize = true}; auto [a, b] = make_vars("a", "b"); diff --git a/test/cosh.cpp b/test/cosh.cpp index 722d3c8fc..515f16ef4 100644 --- a/test/cosh.cpp +++ b/test/cosh.cpp @@ -223,7 +223,7 @@ TEST_CASE("normalise") // Test to check vectorisation via the vector-function-abi-variant machinery. TEST_CASE("vfabi") { - llvm_state s; + llvm_state s{kw::slp_vectorize = true}; auto [a, b] = make_vars("a", "b"); diff --git a/test/erf.cpp b/test/erf.cpp index d2160db4f..7d1c33f12 100644 --- a/test/erf.cpp +++ b/test/erf.cpp @@ -230,7 +230,7 @@ TEST_CASE("normalise") // Test to check vectorisation via the vector-function-abi-variant machinery. TEST_CASE("vfabi") { - llvm_state s; + llvm_state s{kw::slp_vectorize = true}; auto [a, b] = make_vars("a", "b"); diff --git a/test/exp.cpp b/test/exp.cpp index 9f93615e4..856b735ed 100644 --- a/test/exp.cpp +++ b/test/exp.cpp @@ -237,7 +237,7 @@ TEST_CASE("normalise") // Test to check vectorisation via the vector-function-abi-variant machinery. TEST_CASE("vfabi") { - llvm_state s; + llvm_state s{kw::slp_vectorize = true}; auto [a, b] = make_vars("a", "b"); diff --git a/test/llvm_state.cpp b/test/llvm_state.cpp index b32b9e1ee..e3ebbbdbe 100644 --- a/test/llvm_state.cpp +++ b/test/llvm_state.cpp @@ -78,6 +78,7 @@ TEST_CASE("empty state") REQUIRE(!s.get_bc().empty()); REQUIRE(!s.get_ir().empty()); REQUIRE(s.get_opt_level() == 3u); + REQUIRE(!s.get_slp_vectorize()); // Print also some info on the FP types. std::cout << "Double digits : " << std::numeric_limits::digits << '\n'; @@ -114,6 +115,7 @@ TEST_CASE("copy semantics") REQUIRE(s.get_opt_level() == 2u); REQUIRE(s.fast_math()); REQUIRE(!s.is_compiled()); + REQUIRE(!s.get_slp_vectorize()); const auto orig_ir = s.get_ir(); const auto orig_bc = s.get_bc(); @@ -124,6 +126,7 @@ TEST_CASE("copy semantics") REQUIRE(s2.get_opt_level() == 2u); REQUIRE(s2.fast_math()); REQUIRE(!s2.is_compiled()); + REQUIRE(!s2.get_slp_vectorize()); REQUIRE(s2.get_ir() == orig_ir); REQUIRE(s2.get_bc() == orig_bc); @@ -144,12 +147,14 @@ TEST_CASE("copy semantics") { std::vector jet{2, 3, 0, 0}; - llvm_state s{kw::mname = "sample state", kw::opt_level = 2u, kw::fast_math = true}; + llvm_state s{kw::mname = "sample state", kw::opt_level = 2u, kw::fast_math = true, kw::slp_vectorize = true}; taylor_add_jet(s, "jet", {x * y, y * x}, 1, 1, true, false); s.compile(); + REQUIRE(s.get_slp_vectorize()); + auto jptr = reinterpret_cast(s.jit_lookup("jet")); const auto orig_ir = s.get_ir(); @@ -161,6 +166,7 @@ TEST_CASE("copy semantics") REQUIRE(s2.get_opt_level() == 2u); REQUIRE(s2.fast_math()); REQUIRE(s2.is_compiled()); + REQUIRE(s2.get_slp_vectorize()); REQUIRE(s2.get_ir() == orig_ir); REQUIRE(s2.get_bc() == orig_bc); @@ -216,7 +222,8 @@ TEST_CASE("s11n") oa << s; } - s = llvm_state{kw::mname = "sample state", kw::opt_level = 2u, kw::fast_math = true, kw::force_avx512 = true}; + s = llvm_state{kw::mname = "sample state", kw::opt_level = 2u, kw::fast_math = true, kw::force_avx512 = true, + kw::slp_vectorize = true}; { boost::archive::binary_iarchive ia(ss); @@ -231,13 +238,14 @@ TEST_CASE("s11n") REQUIRE(s.get_opt_level() == 3u); REQUIRE(s.fast_math() == false); REQUIRE(s.force_avx512() == false); + REQUIRE(!s.get_slp_vectorize()); } // Compiled state. { std::stringstream ss; - llvm_state s{kw::mname = "foo"}; + llvm_state s{kw::mname = "foo", kw::slp_vectorize = true}; taylor_add_jet(s, "jet", {-1_dbl, x + y}, 1, 1, true, false); @@ -266,6 +274,7 @@ TEST_CASE("s11n") REQUIRE(s.module_name() == "foo"); REQUIRE(s.get_opt_level() == 3u); REQUIRE(s.fast_math() == false); + REQUIRE(s.get_slp_vectorize()); auto jptr = reinterpret_cast(s.jit_lookup("jet")); @@ -285,7 +294,8 @@ TEST_CASE("make_similar") { auto [x, y] = make_vars("x", "y"); - llvm_state s{kw::mname = "sample state", kw::opt_level = 2u, kw::fast_math = true, kw::force_avx512 = true}; + llvm_state s{kw::mname = "sample state", kw::opt_level = 2u, kw::fast_math = true, kw::force_avx512 = true, + kw::slp_vectorize = true}; taylor_add_jet(s, "jet", {-1_dbl, x + y}, 1, 1, true, false); s.compile(); @@ -295,6 +305,7 @@ TEST_CASE("make_similar") REQUIRE(s.fast_math()); REQUIRE(s.is_compiled()); REQUIRE(s.force_avx512()); + REQUIRE(s.get_slp_vectorize()); auto s2 = s.make_similar(); @@ -304,6 +315,7 @@ TEST_CASE("make_similar") REQUIRE(s2.force_avx512()); REQUIRE(!s2.is_compiled()); REQUIRE(s.get_ir() != s2.get_ir()); + REQUIRE(s2.get_slp_vectorize()); } TEST_CASE("force_avx512") @@ -347,6 +359,53 @@ TEST_CASE("force_avx512") } } +TEST_CASE("slp_vectorize") +{ + { + llvm_state s; + REQUIRE(!s.get_slp_vectorize()); + + llvm_state s2(s); + REQUIRE(!s2.get_slp_vectorize()); + + llvm_state s3(std::move(s2)); + REQUIRE(!s3.get_slp_vectorize()); + + llvm_state s4; + s4 = s3; + REQUIRE(!s4.get_slp_vectorize()); + + llvm_state s5; + s5 = std::move(s4); + REQUIRE(!s5.get_slp_vectorize()); + + s5.set_slp_vectorize(true); + REQUIRE(s5.get_slp_vectorize()); + } + + { + llvm_state s{kw::slp_vectorize = true}; + REQUIRE(s.get_slp_vectorize()); + + llvm_state s2(s); + REQUIRE(s2.get_slp_vectorize()); + + llvm_state s3(std::move(s2)); + REQUIRE(s3.get_slp_vectorize()); + + llvm_state s4; + s4 = s3; + REQUIRE(s4.get_slp_vectorize()); + + llvm_state s5; + s5 = std::move(s4); + REQUIRE(s5.get_slp_vectorize()); + + s5.set_slp_vectorize(false); + REQUIRE(!s5.get_slp_vectorize()); + } +} + TEST_CASE("existing trigger") { using Catch::Matchers::Message; diff --git a/test/llvm_state_mem_cache.cpp b/test/llvm_state_mem_cache.cpp index 02d8525a6..97960422b 100644 --- a/test/llvm_state_mem_cache.cpp +++ b/test/llvm_state_mem_cache.cpp @@ -121,3 +121,22 @@ TEST_CASE("force_avx512 test") ta = taylor_adaptive{model::pendulum(), {1., 0.}, kw::tol = 1e-11, kw::force_avx512 = true}; REQUIRE(llvm_state::get_memcache_size() > size11); } + +// Same test for the slp_vectorize option. +TEST_CASE("slp_vectorize test") +{ + llvm_state::clear_memcache(); + llvm_state::set_memcache_limit(2048ull * 1024u * 1024u); + + auto ta = taylor_adaptive{model::pendulum(), {1., 0.}, kw::tol = 1e-11}; + const auto size11 = llvm_state::get_memcache_size(); + + ta = taylor_adaptive{model::pendulum(), {1., 0.}, kw::tol = 1e-11, kw::slp_vectorize = true}; + REQUIRE(llvm_state::get_memcache_size() > size11); + + const auto new_size = llvm_state::get_memcache_size(); + + ta = taylor_adaptive{ + model::pendulum(), {1., 0.}, kw::tol = 1e-11, kw::slp_vectorize = true, kw::force_avx512 = true}; + REQUIRE(llvm_state::get_memcache_size() > new_size); +} diff --git a/test/log.cpp b/test/log.cpp index 79308ca72..60b5770e0 100644 --- a/test/log.cpp +++ b/test/log.cpp @@ -213,7 +213,7 @@ TEST_CASE("normalise") // Test to check vectorisation via the vector-function-abi-variant machinery. TEST_CASE("vfabi") { - llvm_state s; + llvm_state s{kw::slp_vectorize = true}; auto [a, b] = make_vars("a", "b"); diff --git a/test/opt_checks.cpp b/test/opt_checks.cpp index 364a05848..cd09b55e3 100644 --- a/test/opt_checks.cpp +++ b/test/opt_checks.cpp @@ -50,7 +50,7 @@ TEST_CASE("function inlining") // the timestep size in an integrator. TEST_CASE("pow vect") { - auto ta = taylor_adaptive{model::pendulum(), std::vector(2u, 0.)}; + auto ta = taylor_adaptive{model::pendulum(), std::vector(2u, 0.), kw::slp_vectorize = true}; #if defined(HEYOKA_WITH_SLEEF) diff --git a/test/pow.cpp b/test/pow.cpp index 39ba64d80..d2488ed85 100644 --- a/test/pow.cpp +++ b/test/pow.cpp @@ -421,7 +421,7 @@ TEST_CASE("normalise") // Test to check vectorisation via the vector-function-abi-variant machinery. TEST_CASE("vfabi") { - llvm_state s; + llvm_state s{kw::slp_vectorize = true}; auto [a, b] = make_vars("a", "b"); diff --git a/test/sin.cpp b/test/sin.cpp index b67f3f376..82724f72a 100644 --- a/test/sin.cpp +++ b/test/sin.cpp @@ -231,7 +231,7 @@ TEST_CASE("normalise") // Test to check vectorisation via the vector-function-abi-variant machinery. TEST_CASE("vfabi") { - llvm_state s; + llvm_state s{kw::slp_vectorize = true}; auto [a, b] = make_vars("a", "b"); @@ -293,7 +293,7 @@ TEST_CASE("vfabi") // Some more extensive testing specific to x86, only for this function. auto [c, d, e] = make_vars("c", "d", "e"); - llvm_state s2; + llvm_state s2{kw::slp_vectorize = true}; add_cfunc(s2, "cfunc1", {sin(a), sin(b), sin(c), sin(d)}); add_cfunc(s2, "cfunc2", {sin(a), sin(b), sin(c), sin(d), sin(e)}); @@ -346,7 +346,7 @@ TEST_CASE("vfabi") // Check that the autovec works also on batch sizes which do not correspond // exactly to an available vector width. - llvm_state s3; + llvm_state s3{kw::slp_vectorize = true}; add_cfunc(s3, "cfunc", {sin(a)}, kw::batch_size = 3u); diff --git a/test/sinh.cpp b/test/sinh.cpp index c7c72a5d7..b4e9dd134 100644 --- a/test/sinh.cpp +++ b/test/sinh.cpp @@ -223,7 +223,7 @@ TEST_CASE("normalise") // Test to check vectorisation via the vector-function-abi-variant machinery. TEST_CASE("vfabi") { - llvm_state s; + llvm_state s{kw::slp_vectorize = true}; auto [a, b] = make_vars("a", "b"); @@ -282,7 +282,7 @@ TEST_CASE("vfabi") // Some more extensive testing specific to x86, only for this function. auto [c, d, e] = make_vars("c", "d", "e"); - llvm_state s2; + llvm_state s2{kw::slp_vectorize = true}; add_cfunc(s2, "cfunc1", {sinh(a), sinh(b), sinh(c), sinh(d)}); add_cfunc(s2, "cfunc2", {sinh(a), sinh(b), sinh(c), sinh(d), sinh(e)}); @@ -331,7 +331,7 @@ TEST_CASE("vfabi") // Check that the autovec works also on batch sizes which do not correspond // exactly to an available vector width. - llvm_state s3; + llvm_state s3{kw::slp_vectorize = true}; add_cfunc(s3, "cfunc", {sinh(a)}, kw::batch_size = 3u); diff --git a/test/sqrt.cpp b/test/sqrt.cpp index 377504788..80c69319b 100644 --- a/test/sqrt.cpp +++ b/test/sqrt.cpp @@ -209,7 +209,7 @@ TEST_CASE("cfunc_mp") // Test to check vectorisation. TEST_CASE("slp vect") { - llvm_state s; + llvm_state s{kw::slp_vectorize = true}; auto [a, b] = make_vars("a", "b"); diff --git a/test/tan.cpp b/test/tan.cpp index 02204cf67..250f03181 100644 --- a/test/tan.cpp +++ b/test/tan.cpp @@ -222,7 +222,7 @@ TEST_CASE("normalise") // Test to check vectorisation via the vector-function-abi-variant machinery. TEST_CASE("vfabi") { - llvm_state s; + llvm_state s{kw::slp_vectorize = true}; auto [a, b] = make_vars("a", "b"); diff --git a/test/tanh.cpp b/test/tanh.cpp index 11f89ed78..7b9bcbb85 100644 --- a/test/tanh.cpp +++ b/test/tanh.cpp @@ -223,7 +223,7 @@ TEST_CASE("normalise") // Test to check vectorisation via the vector-function-abi-variant machinery. TEST_CASE("vfabi") { - llvm_state s; + llvm_state s{kw::slp_vectorize = true}; auto [a, b] = make_vars("a", "b");