Skip to content

Commit

Permalink
Merge pull request #344 from bluescarni/pr/slp_opt
Browse files Browse the repository at this point in the history
Make SLP vectorization optional, turn off by default
  • Loading branch information
bluescarni authored Sep 4, 2023
2 parents f2ccb29 + d288f0c commit 6989f64
Show file tree
Hide file tree
Showing 24 changed files with 154 additions and 43 deletions.
5 changes: 3 additions & 2 deletions doc/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@ Changes
- **BREAKING**: the minimum supported LLVM version is now 11
(`#342 <https://github.com/bluescarni/heyoka/pull/342>`__).
This is a :ref:`breaking change <bchanges_2_0_0>`.
- The LLVM SLP vectorizer is now enabled when JIT compiling
with optimisations
- The LLVM SLP vectorizer can now be enabled
(`#341 <https://github.com/bluescarni/heyoka/pull/341>`__).
This feature is opt-in due to the fact that enabling it
can considerably increase JIT compilation times.
- The optimisation level for an ``llvm_state`` is now clamped
within the ``[0, 3]`` range
(`#340 <https://github.com/bluescarni/heyoka/pull/340>`__).
Expand Down
19 changes: 16 additions & 3 deletions include/heyoka/llvm_state.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ IGOR_MAKE_NAMED_ARGUMENT(fast_math);
// can get rid of this in the future when AVX512 implementations improve
// and LLVM learns to discriminate good and bad implementations.
IGOR_MAKE_NAMED_ARGUMENT(force_avx512);
IGOR_MAKE_NAMED_ARGUMENT(slp_vectorize);

} // namespace kw

Expand Down Expand Up @@ -134,6 +135,7 @@ class HEYOKA_DLL_PUBLIC llvm_state
std::string m_bc_snapshot;
bool m_fast_math;
bool m_force_avx512;
bool m_slp_vectorize;
std::string m_module_name;

// Serialization.
Expand Down Expand Up @@ -205,10 +207,19 @@ class HEYOKA_DLL_PUBLIC llvm_state
}
}();

return std::tuple{std::move(mod_name), opt_level, fmath, force_avx512};
// Enable SLP vectorization (defaults to false).
auto slp_vectorize = [&p]() -> bool {
if constexpr (p.has(kw::slp_vectorize)) {
return std::forward<decltype(p(kw::slp_vectorize))>(p(kw::slp_vectorize));
} else {
return false;
}
}();

return std::tuple{std::move(mod_name), opt_level, fmath, force_avx512, slp_vectorize};
}
}
explicit llvm_state(std::tuple<std::string, unsigned, bool, bool> &&);
explicit llvm_state(std::tuple<std::string, unsigned, bool, bool, bool> &&);

// Small shared helper to setup the math flags in the builder at the
// end of a constructor.
Expand Down Expand Up @@ -254,6 +265,8 @@ class HEYOKA_DLL_PUBLIC llvm_state
[[nodiscard]] bool force_avx512() const;
[[nodiscard]] unsigned get_opt_level() const;
void set_opt_level(unsigned);
[[nodiscard]] bool get_slp_vectorize() const;
void set_slp_vectorize(bool);

[[nodiscard]] std::string get_ir() const;
[[nodiscard]] std::string get_bc() const;
Expand Down Expand Up @@ -298,7 +311,7 @@ HEYOKA_END_NAMESPACE
// - version 1: got rid of the inline_functions setting;
// - version 2: added the force_avx512 setting;
// - version 3: added the bitcode snapshot, simplified
// compilation logic.
// compilation logic, slp_vectorize flag.
BOOST_CLASS_VERSION(heyoka::llvm_state, 3)

#endif
41 changes: 30 additions & 11 deletions src/llvm_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -569,9 +569,9 @@ auto llvm_state_bc_to_module(const std::string &module_name, const std::string &

} // namespace detail

llvm_state::llvm_state(std::tuple<std::string, unsigned, bool, bool> &&tup)
llvm_state::llvm_state(std::tuple<std::string, unsigned, bool, bool, bool> &&tup)
: m_jitter(std::make_unique<jit>()), m_opt_level(std::get<1>(tup)), m_fast_math(std::get<2>(tup)),
m_force_avx512(std::get<3>(tup)), m_module_name(std::move(std::get<0>(tup)))
m_force_avx512(std::get<3>(tup)), m_slp_vectorize(std::get<4>(tup)), m_module_name(std::move(std::get<0>(tup)))
{
// Create the module.
m_module = std::make_unique<llvm::Module>(m_module_name, context());
Expand All @@ -595,7 +595,7 @@ llvm_state::llvm_state(const llvm_state &other)
// - creating a new jit,
// - copying over the options from other.
: m_jitter(std::make_unique<jit>()), m_opt_level(other.m_opt_level), m_fast_math(other.m_fast_math),
m_force_avx512(other.m_force_avx512), m_module_name(other.m_module_name)
m_force_avx512(other.m_force_avx512), m_slp_vectorize(other.m_slp_vectorize), m_module_name(other.m_module_name)
{
if (other.is_compiled()) {
// 'other' was compiled.
Expand Down Expand Up @@ -652,6 +652,7 @@ llvm_state &llvm_state::operator=(llvm_state &&other) noexcept
m_bc_snapshot = std::move(other.m_bc_snapshot);
m_fast_math = other.m_fast_math;
m_force_avx512 = other.m_force_avx512;
m_slp_vectorize = other.m_slp_vectorize;
m_module_name = std::move(other.m_module_name);
}

Expand Down Expand Up @@ -687,6 +688,7 @@ void llvm_state::save_impl(Archive &ar, unsigned) const
ar << m_opt_level;
ar << m_fast_math;
ar << m_force_avx512;
ar << m_slp_vectorize;
ar << m_module_name;

// Store the bitcode.
Expand Down Expand Up @@ -757,6 +759,10 @@ void llvm_state::load_impl(Archive &ar, unsigned version)
bool force_avx512{};
ar >> force_avx512;

// NOLINTNEXTLINE(misc-const-correctness)
bool slp_vectorize{};
ar >> slp_vectorize;

// NOLINTNEXTLINE(misc-const-correctness)
std::string module_name;
ar >> module_name;
Expand All @@ -783,6 +789,7 @@ void llvm_state::load_impl(Archive &ar, unsigned version)
m_opt_level = opt_level;
m_fast_math = fast_math;
m_force_avx512 = force_avx512;
m_slp_vectorize = slp_vectorize;
m_module_name = module_name;

// Reset module and builder to the def-cted state.
Expand Down Expand Up @@ -889,6 +896,16 @@ bool llvm_state::force_avx512() const
return m_force_avx512;
}

bool llvm_state::get_slp_vectorize() const
{
return m_slp_vectorize;
}

void llvm_state::set_slp_vectorize(bool flag)
{
m_slp_vectorize = flag;
}

unsigned llvm_state::clamp_opt_level(unsigned opt_level)
{
return std::min<unsigned>(opt_level, 3u);
Expand Down Expand Up @@ -1038,7 +1055,7 @@ void llvm_state::optimise()

// Create the new pass manager builder, passing
// the native target machine from the JIT class.
// NOTE: we turn manually on the SLP vectoriser here, which is off
// NOTE: if requested, we turn manually on the SLP vectoriser here, which is off
// by default. Not sure why it is off, the LLVM docs imply this
// is on by default at nonzero optimisation levels for clang and opt.
// NOTE: the reason for this inconsistency is that opt uses PB.parsePassPipeline()
Expand All @@ -1050,7 +1067,7 @@ void llvm_state::optimise()
// switching to this alternative way of setting up the optimisation pipeline
// in the future.
llvm::PipelineTuningOptions pto;
pto.SLPVectorization = true;
pto.SLPVectorization = m_slp_vectorize;
llvm::PassBuilder PB(m_jitter->m_tm.get(), pto);

// Register all the basic analyses with the managers.
Expand Down Expand Up @@ -1108,10 +1125,10 @@ void llvm_state::optimise()
pm_builder.OptLevel = m_opt_level;
// Enable function inlining.
pm_builder.Inliner = llvm::createFunctionInliningPass(m_opt_level, 0, false);
// NOTE: we turn manually on the SLP vectoriser here, which is off
// NOTE: if requested, we turn manually on the SLP vectoriser here, which is off
// by default. Not sure why it is off, the LLVM docs imply this
// is on by default at nonzero optimisation levels for clang and opt.
pm_builder.SLPVectorize = true;
pm_builder.SLPVectorize = m_slp_vectorize;

m_jitter->m_tm->adjustPassManager(pm_builder);

Expand Down Expand Up @@ -1217,10 +1234,11 @@ void llvm_state::compile()
// Fetch the bitcode *before* optimisation.
auto orig_bc = get_bc();

// Combine m_opt_level and m_force_avx512 into a single value,
// as they both affect codegen.
// Combine m_opt_level, m_force_avx512 and m_slp_vectorize into a single value,
// as they all affect codegen.
assert(m_opt_level <= 3u);
const auto olevel = m_opt_level + (static_cast<unsigned>(m_force_avx512) << 2);
const auto olevel = m_opt_level + (static_cast<unsigned>(m_force_avx512) << 2)
+ (static_cast<unsigned>(m_slp_vectorize) << 3);

if (auto cached_data = detail::llvm_state_mem_cache_lookup(orig_bc, olevel)) {
// Cache hit.
Expand Down Expand Up @@ -1361,7 +1379,7 @@ const std::string &llvm_state::module_name() const
llvm_state llvm_state::make_similar() const
{
return llvm_state(kw::mname = m_module_name, kw::opt_level = m_opt_level, kw::fast_math = m_fast_math,
kw::force_avx512 = m_force_avx512);
kw::force_avx512 = m_force_avx512, kw::slp_vectorize = m_slp_vectorize);
}

std::ostream &operator<<(std::ostream &os, const llvm_state &s)
Expand All @@ -1373,6 +1391,7 @@ std::ostream &operator<<(std::ostream &os, const llvm_state &s)
oss << "Compiled : " << s.is_compiled() << '\n';
oss << "Fast math : " << s.m_fast_math << '\n';
oss << "Force AVX512 : " << s.m_force_avx512 << '\n';
oss << "SLP vectorization : " << s.m_slp_vectorize << '\n';
oss << "Optimisation level: " << s.m_opt_level << '\n';
oss << "Data layout : " << s.m_jitter->m_lljit->getDataLayout().getStringRepresentation() << '\n';
oss << "Target triple : " << s.m_jitter->get_target_triple().str() << '\n';
Expand Down
2 changes: 1 addition & 1 deletion test/acos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ TEST_CASE("normalise")
// Test to check vectorisation via the vector-function-abi-variant machinery.
TEST_CASE("vfabi")
{
llvm_state s;
llvm_state s{kw::slp_vectorize = true};

auto [a, b] = make_vars("a", "b");

Expand Down
2 changes: 1 addition & 1 deletion test/acosh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ TEST_CASE("normalise")
// Test to check vectorisation via the vector-function-abi-variant machinery.
TEST_CASE("vfabi")
{
llvm_state s;
llvm_state s{kw::slp_vectorize = true};

auto [a, b] = make_vars("a", "b");

Expand Down
2 changes: 1 addition & 1 deletion test/asin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ TEST_CASE("normalise")
// Test to check vectorisation via the vector-function-abi-variant machinery.
TEST_CASE("vfabi")
{
llvm_state s;
llvm_state s{kw::slp_vectorize = true};

auto [a, b] = make_vars("a", "b");

Expand Down
2 changes: 1 addition & 1 deletion test/asinh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ TEST_CASE("normalise")
// Test to check vectorisation via the vector-function-abi-variant machinery.
TEST_CASE("vfabi")
{
llvm_state s;
llvm_state s{kw::slp_vectorize = true};

auto [a, b] = make_vars("a", "b");

Expand Down
2 changes: 1 addition & 1 deletion test/atan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ TEST_CASE("normalise")
// Test to check vectorisation via the vector-function-abi-variant machinery.
TEST_CASE("vfabi")
{
llvm_state s;
llvm_state s{kw::slp_vectorize = true};

auto [a, b] = make_vars("a", "b");

Expand Down
2 changes: 1 addition & 1 deletion test/atan2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ TEST_CASE("normalise")
// Test to check vectorisation via the vector-function-abi-variant machinery.
TEST_CASE("vfabi")
{
llvm_state s;
llvm_state s{kw::slp_vectorize = true};

auto [a, b] = make_vars("a", "b");

Expand Down
2 changes: 1 addition & 1 deletion test/atanh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ TEST_CASE("normalise")
// Test to check vectorisation via the vector-function-abi-variant machinery.
TEST_CASE("vfabi")
{
llvm_state s;
llvm_state s{kw::slp_vectorize = true};

auto [a, b] = make_vars("a", "b");

Expand Down
2 changes: 1 addition & 1 deletion test/cos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ TEST_CASE("normalise")
// Test to check vectorisation via the vector-function-abi-variant machinery.
TEST_CASE("vfabi")
{
llvm_state s;
llvm_state s{kw::slp_vectorize = true};

auto [a, b] = make_vars("a", "b");

Expand Down
2 changes: 1 addition & 1 deletion test/cosh.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ TEST_CASE("normalise")
// Test to check vectorisation via the vector-function-abi-variant machinery.
TEST_CASE("vfabi")
{
llvm_state s;
llvm_state s{kw::slp_vectorize = true};

auto [a, b] = make_vars("a", "b");

Expand Down
2 changes: 1 addition & 1 deletion test/erf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ TEST_CASE("normalise")
// Test to check vectorisation via the vector-function-abi-variant machinery.
TEST_CASE("vfabi")
{
llvm_state s;
llvm_state s{kw::slp_vectorize = true};

auto [a, b] = make_vars("a", "b");

Expand Down
2 changes: 1 addition & 1 deletion test/exp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ TEST_CASE("normalise")
// Test to check vectorisation via the vector-function-abi-variant machinery.
TEST_CASE("vfabi")
{
llvm_state s;
llvm_state s{kw::slp_vectorize = true};

auto [a, b] = make_vars("a", "b");

Expand Down
Loading

0 comments on commit 6989f64

Please sign in to comment.