diff --git a/CMakeLists.txt b/CMakeLists.txt index b7ceb87ca..bf7ffdf12 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,7 +11,7 @@ if(NOT CMAKE_BUILD_TYPE) FORCE) endif() -project(heyoka VERSION 1.0.0 LANGUAGES CXX C) +project(heyoka VERSION 1.1.0 LANGUAGES CXX C) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" "${CMAKE_CURRENT_SOURCE_DIR}/cmake/yacma") @@ -293,7 +293,7 @@ if(HEYOKA_WITH_SLEEF) endif() # Setup the heyoka ABI version number. -set(HEYOKA_ABI_VERSION 22) +set(HEYOKA_ABI_VERSION 23) if(HEYOKA_BUILD_STATIC_LIBRARY) # Setup of the heyoka static library. diff --git a/benchmark/large_cfunc.cpp b/benchmark/large_cfunc.cpp index c70c9f51d..cc9fdb069 100644 --- a/benchmark/large_cfunc.cpp +++ b/benchmark/large_cfunc.cpp @@ -56,7 +56,6 @@ int main() add_cfunc(s, "en", {en}, kw::vars = {x, y, z, vx, vy, vz}, kw::compact_mode = true); - s.optimise(); s.compile(); [[maybe_unused]] auto fn = s.jit_lookup("en"); diff --git a/benchmark/penc_comparison.cpp b/benchmark/penc_comparison.cpp index e1d2dcb94..f602b23e6 100644 --- a/benchmark/penc_comparison.cpp +++ b/benchmark/penc_comparison.cpp @@ -133,9 +133,6 @@ void run_benchmark(unsigned order) // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); diff --git a/doc/changelog.rst b/doc/changelog.rst index e44b04d43..ceb634266 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -1,6 +1,38 @@ Changelog ========= +1.1.0 (unreleased) +------------------ + +New +~~~ + +- It is now possible to get the LLVM bitcode of + an ``llvm_state`` + (`#339 `__). + +Changes +~~~~~~~ + +- The LLVM bitcode is now used internally (instead of the textual + representation of the IR) when copying and serialising + an ``llvm_state`` + (`#339 `__). +- The optimisation pass in an ``llvm_state`` is now automatically + called during compilation + (`#339 `__). + +Fix +~~~ + +- Fix the object file of an ``llvm_state`` not being + preserved during copy and deserialisation + (`#339 `__). +- Fix LLVM module name not being preserved during + copy and deserialisation of ``llvm_state`` + (`#339 `__). +- Fix broken link in the docs. + 1.0.0 (2023-08-10) ------------------ diff --git a/include/heyoka/llvm_state.hpp b/include/heyoka/llvm_state.hpp index 2e6cb0554..fd05287d9 100644 --- a/include/heyoka/llvm_state.hpp +++ b/include/heyoka/llvm_state.hpp @@ -133,6 +133,7 @@ class HEYOKA_DLL_PUBLIC llvm_state std::unique_ptr m_builder; unsigned m_opt_level; std::string m_ir_snapshot; + std::string m_bc_snapshot; bool m_fast_math; bool m_force_avx512; std::string m_module_name; @@ -210,19 +211,20 @@ class HEYOKA_DLL_PUBLIC llvm_state // end of a constructor. HEYOKA_DLL_LOCAL void ctor_setup_math_flags(); -public: - llvm_state(); - // NOTE: enable the kwargs ctor only if: + // Meta-programming for the kwargs ctor. Enabled if: // - there is at least 1 argument (i.e., cannot act as a def ctor), // - if there is only 1 argument, it cannot be of type llvm_state // (so that it does not interfere with copy/move ctors). - template 0u) - && (sizeof...(KwArgs) > 1u - || std::conjunction_v, llvm_state>>...>), - int> - = 0> + template + using kwargs_ctor_enabler = std::enable_if_t< + (sizeof...(KwArgs) > 0u) + && (sizeof...(KwArgs) > 1u + || std::conjunction_v, llvm_state>>...>), + int>; + +public: + llvm_state(); + template = 0> explicit llvm_state(KwArgs &&...kw_args) : llvm_state(kw_args_ctor_impl(std::forward(kw_args)...)) { } @@ -246,6 +248,7 @@ class HEYOKA_DLL_PUBLIC llvm_state [[nodiscard]] bool force_avx512() const; [[nodiscard]] std::string get_ir() const; + [[nodiscard]] std::string get_bc() const; void dump_object_code(const std::string &) const; [[nodiscard]] const std::string &get_object_code() const; @@ -255,6 +258,7 @@ class HEYOKA_DLL_PUBLIC llvm_state void optimise(); [[nodiscard]] bool is_compiled() const; + [[nodiscard]] bool has_object_code() const; void compile(); @@ -265,9 +269,10 @@ class HEYOKA_DLL_PUBLIC llvm_state HEYOKA_END_NAMESPACE -// Current archive version is 2. Changelog: +// Archive version changelog: // - version 1: got rid of the inline_functions setting; -// - version 2: added the force_avx512 setting. -BOOST_CLASS_VERSION(heyoka::llvm_state, 2) +// - version 2: added the force_avx512 setting; +// - version 3: added the bitcode snapshot. +BOOST_CLASS_VERSION(heyoka::llvm_state, 3) #endif diff --git a/include/heyoka/taylor.hpp b/include/heyoka/taylor.hpp index 3602e51c7..5aeca1825 100644 --- a/include/heyoka/taylor.hpp +++ b/include/heyoka/taylor.hpp @@ -134,7 +134,7 @@ taylor_c_diff_func_name_args(llvm::LLVMContext &, llvm::Type *, const std::strin // Add a function for computing the dense output // via polynomial evaluation. void taylor_add_d_out_function(llvm_state &, llvm::Type *, std::uint32_t, std::uint32_t, std::uint32_t, bool, - bool = true, bool = true); + bool = true); } // namespace detail diff --git a/src/detail/event_detection.cpp b/src/detail/event_detection.cpp index d1eb1de47..c6af77b14 100644 --- a/src/detail/event_detection.cpp +++ b/src/detail/event_detection.cpp @@ -911,9 +911,6 @@ taylor_adaptive::ed_data::ed_data(llvm_state s, std::vector tes, s // Add the function for the fast exclusion check. detail::llvm_add_fex_check(m_state, fp_t, order, 1); - // Run the optimisation pass. - m_state.optimise(); - // Compile. m_state.compile(); @@ -1548,9 +1545,6 @@ taylor_adaptive_batch::ed_data::ed_data(llvm_state s, std::vector // NOTE: the fast exclusion check is vectorised. detail::llvm_add_fex_check(m_state, fp_t, order, batch_size); - // Run the optimisation pass. - m_state.optimise(); - // Compile. m_state.compile(); diff --git a/src/detail/llvm_helpers.cpp b/src/detail/llvm_helpers.cpp index 799882260..27d08bdd0 100644 --- a/src/detail/llvm_helpers.cpp +++ b/src/detail/llvm_helpers.cpp @@ -939,7 +939,7 @@ std::string llvm_type_name(llvm::Type *t) t->print(ostr, false, true); - return ostr.str(); + return std::move(ostr.str()); } // This function will return true if: diff --git a/src/expression_cfunc.cpp b/src/expression_cfunc.cpp index 34b6798b9..f825a7431 100644 --- a/src/expression_cfunc.cpp +++ b/src/expression_cfunc.cpp @@ -1722,9 +1722,6 @@ auto add_cfunc_impl(llvm_state &s, const std::string &name, const F &fn, std::ui // Restore the original insertion block. builder.SetInsertPoint(orig_bb); - // Run the optimisation pass. - s.optimise(); - return dc; } diff --git a/src/llvm_state.cpp b/src/llvm_state.cpp index d8f03125f..0f68c1892 100644 --- a/src/llvm_state.cpp +++ b/src/llvm_state.cpp @@ -8,6 +8,7 @@ #include +#include #include #include #include @@ -40,6 +41,8 @@ #include #include #include +#include +#include #include #include #include @@ -59,11 +62,9 @@ #include #include #include -#include #include #include #include -#include #include #include #include @@ -310,6 +311,9 @@ std::uint32_t recommended_simd_size() // Implementation of the jit class. struct llvm_state::jit { + // NOTE: this is the llvm_state containing + // the jit instance. + const llvm_state *m_state = nullptr; std::unique_ptr m_lljit; std::unique_ptr m_tm; std::unique_ptr m_ctx; @@ -318,7 +322,7 @@ struct llvm_state::jit { #endif std::optional m_object_file; - jit() + explicit jit(const llvm_state *state) : m_state(state) { // Ensure the native target is inited. detail::init_native_target(); @@ -353,10 +357,18 @@ struct llvm_state::jit { // when it is lazily generated. m_lljit->getObjTransformLayer().setTransform([this](std::unique_ptr obj_buffer) { assert(obj_buffer); - assert(!m_object_file); - // Copy obj_buffer to the local m_object_file member. - m_object_file.emplace(obj_buffer->getBufferStart(), obj_buffer->getBufferEnd()); + // NOTE: this callback will be invoked the first time a jit lookup is performed, + // even if the object code was manually injected via llvm_state_add_obj_to_jit() + // (e.g., during copy, des11n, etc.). In such a case, m_object_file has already been set up properly and we + // just sanity check in debug mode that the content of m_object_file matches the content of obj_buffer. + if (m_object_file) { + assert(obj_buffer->getBufferSize() == m_object_file->size()); + assert(std::equal(obj_buffer->getBufferStart(), obj_buffer->getBufferEnd(), m_object_file->begin())); + } else { + // Copy obj_buffer to the local m_object_file member. + m_object_file.emplace(obj_buffer->getBufferStart(), obj_buffer->getBufferEnd()); + } return llvm::Expected>(std::move(obj_buffer)); }); @@ -414,6 +426,7 @@ struct llvm_state::jit { #endif } + jit() = delete; jit(const jit &) = delete; jit(jit &&) = delete; jit &operator=(const jit &) = delete; @@ -515,30 +528,46 @@ void llvm_state_add_obj_to_jit(Jit &j, const std::string &obj) "The function for adding a compiled module to the jit failed. The full error message:\n{}", ostr.str())); } // LCOV_EXCL_STOP + + // Add the object code also to the + // m_object_file member. + // NOTE: this function at the moment is used when m_object_file + // is supposed to be empty. + assert(!j.m_object_file); + j.m_object_file.emplace(obj); } -// Helper to create an LLVM module from a IR in string representation. -auto llvm_state_ir_to_module(const std::string &ir, llvm::LLVMContext &ctx) +// Helper to create an LLVM module from bitcode. +// NOTE: the module name needs to be passed explicitly (although it is already +// contained in the bitcode) because apparently llvm::parseBitcodeFile() discards the module +// name when parsing. +// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) +auto llvm_state_bc_to_module(const std::string &module_name, const std::string &bc, llvm::LLVMContext &ctx) { // Create the corresponding memory buffer. - auto mb = llvm::MemoryBuffer::getMemBuffer(ir); + auto mb = llvm::MemoryBuffer::getMemBuffer(bc); + assert(mb); - // Construct a new module from the parsed IR. - llvm::SMDiagnostic err; - auto ret = llvm::parseIR(*mb, err, ctx); + // Parse the bitcode. + auto ret = llvm::parseBitcodeFile(mb->getMemBufferRef(), ctx); // LCOV_EXCL_START if (!ret) { + const auto err = ret.takeError(); std::string err_report; llvm::raw_string_ostream ostr(err_report); - err.print("", ostr); + ostr << err; - throw std::invalid_argument(fmt::format("IR parsing failed. The full error message:\n{}", ostr.str())); + throw std::invalid_argument( + fmt::format("LLVM bitcode parsing failed. The full error message:\n{}", ostr.str())); } // LCOV_EXCL_STOP - return ret; + // Set the module name. + ret.get()->setModuleIdentifier(module_name); + + return std::move(ret.get()); } } // namespace @@ -546,7 +575,7 @@ auto llvm_state_ir_to_module(const std::string &ir, llvm::LLVMContext &ctx) } // namespace detail llvm_state::llvm_state(std::tuple &&tup) - : m_jitter(std::make_unique()), m_opt_level(std::get<1>(tup)), m_fast_math(std::get<2>(tup)), + : m_jitter(std::make_unique(this)), m_opt_level(std::get<1>(tup)), m_fast_math(std::get<2>(tup)), m_force_avx512(std::get<3>(tup)), m_module_name(std::move(std::get<0>(tup))) { // Create the module. @@ -570,31 +599,37 @@ llvm_state::llvm_state(const llvm_state &other) // NOTE: start off by: // - creating a new jit, // - copying over the options from other. - : m_jitter(std::make_unique()), m_opt_level(other.m_opt_level), m_fast_math(other.m_fast_math), + : m_jitter(std::make_unique(this)), m_opt_level(other.m_opt_level), m_fast_math(other.m_fast_math), m_force_avx512(other.m_force_avx512), m_module_name(other.m_module_name) { if (other.is_compiled() && other.m_jitter->m_object_file) { // 'other' was compiled and code was generated. // We leave module and builder empty, copy over the - // IR snapshot and add the cached compiled module + // IR/bitcode snapshots and add the cached compiled module // to the jit. m_ir_snapshot = other.m_ir_snapshot; + m_bc_snapshot = other.m_bc_snapshot; + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) detail::llvm_state_add_obj_to_jit(*m_jitter, *other.m_jitter->m_object_file); } else { // 'other' has not been compiled yet, or // it has been compiled but no code has been // lazily generated yet. - // We will fetch its IR and reconstruct + // We will fetch its bitcode and reconstruct // module and builder. - // Get the IR of other. - // NOTE: this works regardless of the compiled - // status of other. - const auto other_ir = other.get_ir(); + // Is other compiled? + const auto other_cmp = other.is_compiled(); - // Create the module from the IR. - m_module = detail::llvm_state_ir_to_module(other_ir, context()); + // Create the module from the bitcode. + // NOTE: branch to avoid expensive copy if other + // has been compiled. + if (other_cmp) { + m_module = detail::llvm_state_bc_to_module(m_module_name, other.m_bc_snapshot, context()); + } else { + m_module = detail::llvm_state_bc_to_module(m_module_name, other.get_bc(), context()); + } // Create a new builder for the module. m_builder = std::make_unique(context()); @@ -603,15 +638,43 @@ llvm_state::llvm_state(const llvm_state &other) ctor_setup_math_flags(); // Compile if needed. - // NOTE: compilation will take care of setting up m_ir_snapshot. - // If no compilation happens, m_ir_snapshot is left empty after init. - if (other.is_compiled()) { + // NOTE: compilation will take care of setting up m_ir_snapshot/m_bc_snapshot. + // If no compilation happens, m_ir_snapshot/m_bc_snapshot are left empty after init. + if (other_cmp) { + // NOTE: we need to temporarily disable optimisations + // before compilation, for the following reason. + // + // Recall that here we are in the case + // in which the 'other' llvm_state has been compiled, but + // no object code has been produced yet. This means the IR + // has already been optimised, and by running another optimisation + // pass now (indirectly, via compile()) we might end + // up modifying the already-optimised IR. + // By temporarily setting m_opt_level to zero, we are preventing + // any modification to the IR and ensuring that, after copying, + // we have exactly reproduced the original llvm_state object. + const auto orig_opt_level = m_opt_level; + m_opt_level = 0; + compile(); + + // Restore the original optimisation level. + m_opt_level = orig_opt_level; } } } -llvm_state::llvm_state(llvm_state &&) noexcept = default; +// NOTE: this needs to be implemented manually as we need +// to set up correctly the m_state member of the jit instance. +llvm_state::llvm_state(llvm_state &&other) noexcept + : m_jitter(std::move(other.m_jitter)), m_module(std::move(other.m_module)), m_builder(std::move(other.m_builder)), + m_opt_level(other.m_opt_level), m_ir_snapshot(std::move(other.m_ir_snapshot)), + m_bc_snapshot(std::move(other.m_bc_snapshot)), m_fast_math(other.m_fast_math), + m_force_avx512(other.m_force_avx512), m_module_name(std::move(other.m_module_name)) +{ + // Set up m_state. + m_jitter->m_state = this; +} llvm_state &llvm_state::operator=(const llvm_state &other) { @@ -626,6 +689,7 @@ llvm_state &llvm_state::operator=(const llvm_state &other) // needs to be done in a different order (specifically, we need to // ensure that the LLVM objects in this are destroyed in a specific // order). +// NOTE: we also need to set up correctly the m_state member of the jit instance. llvm_state &llvm_state::operator=(llvm_state &&other) noexcept { if (this != &other) { @@ -634,9 +698,13 @@ llvm_state &llvm_state::operator=(llvm_state &&other) noexcept m_module = std::move(other.m_module); m_jitter = std::move(other.m_jitter); + // Set up m_state. + m_jitter->m_state = this; + // The remaining bits. m_opt_level = other.m_opt_level; m_ir_snapshot = std::move(other.m_ir_snapshot); + m_bc_snapshot = std::move(other.m_bc_snapshot); m_fast_math = other.m_fast_math; m_force_avx512 = other.m_force_avx512; m_module_name = std::move(other.m_module_name); @@ -645,12 +713,15 @@ llvm_state &llvm_state::operator=(llvm_state &&other) noexcept return *this; } -llvm_state::~llvm_state() = default; +llvm_state::~llvm_state() +{ + // NOTE: if this has not been moved-from, ensure + // the m_state member of the jit is pointing to this. + if (m_jitter) { + assert(m_jitter->m_state == this); + } +} -// NOTE: the save/load logic is essentially the same as in the -// copy constructor. Specifically, we have 2 different paths -// depending on whether the state is compiled AND object -// code was generated. template void llvm_state::save_impl(Archive &ar, unsigned) const { @@ -671,14 +742,14 @@ void llvm_state::save_impl(Archive &ar, unsigned) const ar << m_force_avx512; ar << m_module_name; - // Store the IR. - // NOTE: avoid get_ir() if the module has been compiled, + // Store the bitcode. + // NOTE: avoid get_bc() if the module has been compiled, // and use the snapshot directly, so that we don't make // a useless copy. if (cmp) { - ar << m_ir_snapshot; + ar << m_bc_snapshot; } else { - ar << get_ir(); + ar << get_bc(); } if (with_obj) { @@ -686,6 +757,15 @@ void llvm_state::save_impl(Archive &ar, unsigned) const // NOLINTNEXTLINE(bugprone-unchecked-optional-access) ar << *m_jitter->m_object_file; } + + // Save a copy of the IR snapshot if the state + // is compiled and binary code was emitted. + // NOTE: we want this because otherwise we would + // need to re-parse the bitcode during des11n to + // restore the IR snapshot. + if (cmp && with_obj) { + ar << m_ir_snapshot; + } } template @@ -735,9 +815,9 @@ void llvm_state::load_impl(Archive &ar, unsigned version) std::string module_name; ar >> module_name; - // Load the ir - std::string ir; - ar >> ir; + // Load the bitcode. + std::string bc_snapshot; + ar >> bc_snapshot; // Recover the object file, if available. std::optional obj_file; @@ -746,6 +826,12 @@ void llvm_state::load_impl(Archive &ar, unsigned version) ar >> *obj_file; } + // Recover the IR snapshot, if available. + std::string ir_snapshot; + if (cmp && with_obj) { + ar >> ir_snapshot; + } + try { // Set the config options. m_opt_level = opt_level; @@ -758,23 +844,25 @@ void llvm_state::load_impl(Archive &ar, unsigned version) m_builder.reset(); // Reset the jit with a new one. - m_jitter = std::make_unique(); + m_jitter = std::make_unique(this); if (cmp && with_obj) { - // Assign the ir snapshot. - m_ir_snapshot = std::move(ir); + // Assign the snapshots. + m_ir_snapshot = std::move(ir_snapshot); + m_bc_snapshot = std::move(bc_snapshot); // Add the object code to the jit. // NOLINTNEXTLINE(bugprone-unchecked-optional-access) detail::llvm_state_add_obj_to_jit(*m_jitter, *obj_file); } else { - // Clear the existing ir snapshot - // (it will be replaced with the - // actual ir if compilation is needed). + // Clear the existing snapshots + // (they will be replaced with the + // actual ir/bitcode if compilation is needed). m_ir_snapshot.clear(); + m_bc_snapshot.clear(); - // Create the module from the IR. - m_module = detail::llvm_state_ir_to_module(ir, context()); + // Create the module from the bitcode. + m_module = detail::llvm_state_bc_to_module(m_module_name, bc_snapshot, context()); // Create a new builder for the module. m_builder = std::make_unique(context()); @@ -783,11 +871,29 @@ void llvm_state::load_impl(Archive &ar, unsigned version) ctor_setup_math_flags(); // Compile if needed. - // NOTE: compilation will take care of setting up m_ir_snapshot. - // If no compilation happens, m_ir_snapshot is left empty after + // NOTE: compilation will take care of setting up m_ir_snapshot/m_bc_snapshot. + // If no compilation happens, m_ir_snapshot/m_bc_snapshot are left empty after // clearing earlier. if (cmp) { + // NOTE: we need to temporarily disable optimisations + // before compilation, for the following reason. + // + // Recall that here we are in the case + // in which the serialised llvm_state had been compiled, but + // no object code had been produced yet. This means the IR + // had already been optimised, and by running another optimisation + // pass (indirectly, via compile()) now we might end + // up modifying the already-optimised IR. + // By temporarily setting m_opt_level to zero, we are preventing + // any modification to the IR and ensuring that, after deserialisation, + // we have exactly reproduced the original llvm_state object. + const auto orig_opt_level = m_opt_level; + m_opt_level = 0; + compile(); + + // Restore the original optimisation level. + m_opt_level = orig_opt_level; } } // LCOV_EXCL_START @@ -1133,6 +1239,8 @@ void llvm_state::optimise() } } +// NOTE: we need to emphasise in the docs that compilation +// triggers an optimisation pass. void llvm_state::compile() { check_uncompiled(__func__); @@ -1149,8 +1257,12 @@ void llvm_state::compile() } try { - // Store a snapshot of the IR before compiling. + // Run the optimisation pass. + optimise(); + + // Store a snapshot of the optimised IR and bitcode before compiling. m_ir_snapshot = get_ir(); + m_bc_snapshot = get_bc(); // Add the module (this will clear out m_module). m_jitter->add_module(std::move(m_module)); @@ -1173,6 +1285,11 @@ bool llvm_state::is_compiled() const return !m_module; } +bool llvm_state::has_object_code() const +{ + return static_cast(m_jitter->m_object_file); +} + // NOTE: this function will lookup symbol names, // so it does not necessarily return a function // pointer (could be, e.g., a global variable). @@ -1199,8 +1316,10 @@ std::string llvm_state::get_ir() const // get the IR from it. std::string out; llvm::raw_string_ostream ostr(out); + m_module->print(ostr, nullptr); - return ostr.str(); + + return std::move(ostr.str()); } else { // The module has been compiled. // Return the IR snapshot that @@ -1209,6 +1328,25 @@ std::string llvm_state::get_ir() const } } +std::string llvm_state::get_bc() const +{ + if (m_module) { + // The module has not been compiled yet, + // get the bitcode from it. + std::string out; + llvm::raw_string_ostream ostr(out); + + llvm::WriteBitcodeToFile(*m_module, ostr); + + return std::move(ostr.str()); + } else { + // The module has been compiled. + // Return the bitcode snapshot that + // was created before the compilation. + return m_bc_snapshot; + } +} + // LCOV_EXCL_START void llvm_state::dump_object_code(const std::string &filename) const @@ -1261,15 +1399,23 @@ std::ostream &operator<<(std::ostream &os, const llvm_state &s) std::ostringstream oss; oss << std::boolalpha; - oss << "Module name : " << s.m_module_name << '\n'; - oss << "Compiled : " << s.is_compiled() << '\n'; - oss << "Fast math : " << s.m_fast_math << '\n'; - oss << "Force AVX512 : " << s.m_force_avx512 << '\n'; - oss << "Optimisation level : " << s.m_opt_level << '\n'; - oss << "Target triple : " << s.m_jitter->get_target_triple().str() << '\n'; - oss << "Target CPU : " << s.m_jitter->get_target_cpu() << '\n'; - oss << "Target features : " << s.m_jitter->get_target_features() << '\n'; - oss << "IR size : " << s.get_ir().size() << '\n'; + oss << "Module name : " << s.m_module_name << '\n'; + oss << "Compiled : " << s.is_compiled() << '\n'; + oss << "Has object code : " << s.has_object_code() << '\n'; + oss << "Fast math : " << s.m_fast_math << '\n'; + oss << "Force AVX512 : " << s.m_force_avx512 << '\n'; + oss << "Optimisation level: " << s.m_opt_level << '\n'; + oss << "Data layout : " << s.m_jitter->m_lljit->getDataLayout().getStringRepresentation() << '\n'; + oss << "Target triple : " << s.m_jitter->get_target_triple().str() << '\n'; + oss << "Target CPU : " << s.m_jitter->get_target_cpu() << '\n'; + oss << "Target features : " << s.m_jitter->get_target_features() << '\n'; + oss << "Bitcode size : "; + + if (s.is_compiled()) { + oss << s.m_bc_snapshot.size() << '\n'; + } else { + oss << s.get_bc().size() << '\n'; + } return os << oss.str(); } diff --git a/src/taylor_00.cpp b/src/taylor_00.cpp index adc849d07..d5a92dc17 100644 --- a/src/taylor_00.cpp +++ b/src/taylor_00.cpp @@ -105,29 +105,6 @@ std::uint32_t n_pars_in_dc(const taylor_dc_t &dc) namespace { -// RAII helper to temporarily set the opt level to 0 in an llvm_state. -struct opt_disabler { - llvm_state *m_s; - unsigned m_orig_opt_level; - - explicit opt_disabler(llvm_state &s) : m_s(&s), m_orig_opt_level(s.opt_level()) - { - // Disable optimisations. - m_s->opt_level() = 0; - } - - opt_disabler(const opt_disabler &) = delete; - opt_disabler(opt_disabler &&) noexcept = delete; - opt_disabler &operator=(const opt_disabler &) = delete; - opt_disabler &operator=(opt_disabler &&) noexcept = delete; - - ~opt_disabler() - { - // Restore the original optimisation level. - m_s->opt_level() = m_orig_opt_level; - } -}; - // Helper to determine the optimal Taylor order for a given tolerance, // following Jorba's prescription. // NOTE: when T is mppp::real and tol has a low precision, the use @@ -293,11 +270,6 @@ auto taylor_add_adaptive_step_with_events(llvm_state &s, const std::string &name // Verify the function. s.verify_function(f); - // Run the optimisation pass. - // NOTE: this does nothing currently, as the optimisation - // level is set to zero from the outside. - s.optimise(); - return std::tuple{std::move(dc), order}; } @@ -468,11 +440,6 @@ auto taylor_add_adaptive_step(llvm_state &s, const std::string &name, const U &s // Verify the function. s.verify_function(f); - // Run the optimisation pass. - // NOTE: this does nothing currently, as the optimisation - // level is set to zero from the outside. - s.optimise(); - return std::tuple{std::move(dc), order}; } @@ -714,11 +681,6 @@ void taylor_adaptive::finalise_ctor_impl(const U &sys, std::vector state, // Do we have events? const auto with_events = !tes.empty() || !ntes.empty(); - // Temporarily disable optimisations in s, so that - // we don't optimise twice when adding the step - // and then the d_out. - std::optional od(m_llvm); - // Add the stepper function. if (with_events) { std::vector ee; @@ -770,15 +732,6 @@ void taylor_adaptive::finalise_ctor_impl(const U &sys, std::vector state, detail::get_logger()->trace("Taylor dense output runtime: {}", sw); sw.reset(); - // Restore the original optimisation level in s. - od.reset(); - - // Run the optimisation pass manually. - m_llvm.optimise(); - - detail::get_logger()->trace("Taylor global opt pass runtime: {}", sw); - sw.reset(); - // Run the jit. m_llvm.compile(); @@ -2397,11 +2350,6 @@ void taylor_adaptive_batch::finalise_ctor_impl(const U &sys, std::vector s // Do we have events? const auto with_events = !tes.empty() || !ntes.empty(); - // Temporarily disable optimisations in s, so that - // we don't optimise twice when adding the step - // and then the d_out. - std::optional od(m_llvm); - // Add the stepper function. if (with_events) { std::vector ee; @@ -2450,15 +2398,6 @@ void taylor_adaptive_batch::finalise_ctor_impl(const U &sys, std::vector s detail::get_logger()->trace("Taylor batch dense output runtime: {}", sw); sw.reset(); - // Restore the original optimisation level in s. - od.reset(); - - // Run the optimisation pass manually. - m_llvm.optimise(); - - detail::get_logger()->trace("Taylor batch global opt pass runtime: {}", sw); - sw.reset(); - // Run the jit. m_llvm.compile(); diff --git a/src/taylor_01.cpp b/src/taylor_01.cpp index 984c06168..f3c4590f9 100644 --- a/src/taylor_01.cpp +++ b/src/taylor_01.cpp @@ -1733,7 +1733,7 @@ template class t_event_impl; // Add a function for computing the dense output // via polynomial evaluation. void taylor_add_d_out_function(llvm_state &s, llvm::Type *fp_scal_t, std::uint32_t n_eq, std::uint32_t order, - std::uint32_t batch_size, bool high_accuracy, bool external_linkage, bool optimise) + std::uint32_t batch_size, bool high_accuracy, bool external_linkage) { // LCOV_EXCL_START assert(n_eq > 0u); @@ -1747,6 +1747,9 @@ void taylor_add_d_out_function(llvm_state &s, llvm::Type *fp_scal_t, std::uint32 // Fetch the external type corresponding to fp_scal_t. auto *ext_fp_scal_t = llvm_ext_type(fp_scal_t); + // Fetch the current insertion block. + auto *orig_bb = builder.GetInsertBlock(); + // The function arguments: // - the output pointer (read/write, used also for accumulation), // - the pointer to the Taylor coefficients (read-only), @@ -1905,10 +1908,8 @@ void taylor_add_d_out_function(llvm_state &s, llvm::Type *fp_scal_t, std::uint32 // Verify the function. s.verify_function(f); - // Run the optimisation pass, if requested. - if (optimise) { - s.optimise(); - } + // Restore the original insertion block. + builder.SetInsertPoint(orig_bb); } } // namespace detail @@ -1960,7 +1961,7 @@ void continuous_output::add_c_out_function(std::uint32_t order, std::uint32_t // Add the function for the computation of the dense output. // NOTE: the dense output function operates on data in external format. - detail::taylor_add_d_out_function(m_llvm_state, fp_t, dim, order, 1, high_accuracy, false, false); + detail::taylor_add_d_out_function(m_llvm_state, fp_t, dim, order, 1, high_accuracy, false); // Fetch it. auto *d_out_f = md.getFunction("d_out_f"); @@ -2152,9 +2153,6 @@ void continuous_output::add_c_out_function(std::uint32_t order, std::uint32_t // Verify the function. m_llvm_state.verify_function(f); - // Run the optimisation pass. - m_llvm_state.optimise(); - // Compile. m_llvm_state.compile(); @@ -2835,9 +2833,6 @@ void continuous_output_batch::add_c_out_function(std::uint32_t order, std::ui // Verify the function. m_llvm_state.verify_function(f); - // Run the optimisation pass. - m_llvm_state.optimise(); - // Compile. m_llvm_state.compile(); diff --git a/src/taylor_02.cpp b/src/taylor_02.cpp index 7b3c85733..2bb8231c4 100644 --- a/src/taylor_02.cpp +++ b/src/taylor_02.cpp @@ -2023,9 +2023,6 @@ auto taylor_add_jet_impl(llvm_state &s, const std::string &name, const U &sys, s // Restore the original insertion block. builder.SetInsertPoint(orig_bb); - // Run the optimisation pass. - s.optimise(); - return dc; } diff --git a/test/event_detection.cpp b/test/event_detection.cpp index a2adf93ac..c062aaadd 100644 --- a/test/event_detection.cpp +++ b/test/event_detection.cpp @@ -110,7 +110,6 @@ TEST_CASE("fast exclusion check") // Add the function and fetch it. detail::llvm_add_fex_check(s, detail::to_llvm_type(s.context()), order, batch_size, use_cs); - s.optimise(); s.compile(); auto fex_check = reinterpret_cast(s.jit_lookup("fex_check")); diff --git a/test/event_detection_mp.cpp b/test/event_detection_mp.cpp index f82bd7ea5..c5eece9cc 100644 --- a/test/event_detection_mp.cpp +++ b/test/event_detection_mp.cpp @@ -67,8 +67,6 @@ TEST_CASE("poly translator 1") detail::add_poly_translator_1(s, detail::llvm_type_like(s, input[0]), 5, 1); - s.optimise(); - s.compile(); auto *pt1 = reinterpret_cast(s.jit_lookup("poly_translate_1")); @@ -100,8 +98,6 @@ TEST_CASE("poly csc") detail::llvm_add_csc(s, detail::llvm_type_like(s, input[0]), 5, 1); - s.optimise(); - s.compile(); auto *pt1 = reinterpret_cast( diff --git a/test/expression_diff_tensors.cpp b/test/expression_diff_tensors.cpp index aa8b26881..eb4be7f6f 100644 --- a/test/expression_diff_tensors.cpp +++ b/test/expression_diff_tensors.cpp @@ -415,7 +415,6 @@ TEST_CASE("fixed centres check") llvm_state s; add_cfunc(s, "diff", diff_vec, kw::vars = vars); - s.optimise(); s.compile(); auto *fr = reinterpret_cast( @@ -462,7 +461,6 @@ TEST_CASE("fixed centres check") // Compile and fetch the expression of the derivative. llvm_state s2; add_cfunc(s2, "diff", {ex}, kw::vars = vars); - s2.optimise(); s2.compile(); auto *fr2 = reinterpret_cast( @@ -514,7 +512,6 @@ TEST_CASE("speelpenning check") llvm_state s; add_cfunc(s, "diff", diff_vec, kw::vars = vars); - s.optimise(); s.compile(); auto *fr = reinterpret_cast( @@ -556,7 +553,6 @@ TEST_CASE("speelpenning check") // Compile and fetch the expression of the derivative. llvm_state s2; add_cfunc(s2, "diff", {ex}, kw::vars = vars); - s2.optimise(); s2.compile(); auto *fr2 = reinterpret_cast( @@ -629,7 +625,6 @@ TEST_CASE("speelpenning complexity") REQUIRE(std::get(dc_reverse[i].value()).extract()->args().size() == 2u); } - s.optimise(); s.compile(); } } diff --git a/test/llvm_helpers.cpp b/test/llvm_helpers.cpp index 193a51de9..226ebbf93 100644 --- a/test/llvm_helpers.cpp +++ b/test/llvm_helpers.cpp @@ -99,9 +99,6 @@ TEST_CASE("sgn scalar") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -153,9 +150,6 @@ TEST_CASE("sgn scalar mp") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -225,9 +219,6 @@ TEST_CASE("sgn batch") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -296,9 +287,6 @@ TEST_CASE("sincos scalar") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -363,9 +351,6 @@ TEST_CASE("sincos batch") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -434,9 +419,6 @@ TEST_CASE("sincos mp") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -484,9 +466,6 @@ TEST_CASE("modulus scalar") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -549,9 +528,6 @@ TEST_CASE("modulus batch") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -597,9 +573,6 @@ TEST_CASE("inv_kep_E_scalar") // Add the function. llvm_add_inv_kep_E_wrapper(s, detail::to_llvm_type(s.context()), 1, "hey_kep"); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -745,9 +718,6 @@ TEST_CASE("inv_kep_E_batch") // Add the function. llvm_add_inv_kep_E_wrapper(s, detail::to_llvm_type(s.context()), batch_size, "hey_kep"); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -986,9 +956,6 @@ TEST_CASE("inv_kep_E_scalar mp") // Add the function. llvm_add_inv_kep_E_wrapper(s, fp_t, 1, "hey_kep"); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -1152,9 +1119,6 @@ TEST_CASE("while_loop") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -1271,8 +1235,6 @@ TEST_CASE("csc_scalar") llvm_add_csc(s, to_llvm_type(s.context()), degree, 1); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup( @@ -1347,8 +1309,6 @@ TEST_CASE("csc_batch") llvm_add_csc(s, to_llvm_type(s.context()), degree, batch_size); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup( @@ -1520,9 +1480,6 @@ TEST_CASE("minmax") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -1667,9 +1624,6 @@ TEST_CASE("fma scalar") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -1728,9 +1682,6 @@ TEST_CASE("fma batch") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -1803,9 +1754,6 @@ TEST_CASE("fma scalar mp") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -1869,9 +1817,6 @@ TEST_CASE("eft_product scalar") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -1954,9 +1899,6 @@ TEST_CASE("eft_product batch") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -2053,9 +1995,6 @@ TEST_CASE("dl mul scalar") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -2161,9 +2100,6 @@ TEST_CASE("dl mul batch") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); // Fetch the function pointer. @@ -2280,9 +2216,6 @@ TEST_CASE("dl div scalar") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -2384,9 +2317,6 @@ TEST_CASE("dl div batch") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); // Fetch the function pointer. @@ -2484,9 +2414,6 @@ TEST_CASE("floor scalar") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -2540,9 +2467,6 @@ TEST_CASE("floor batch") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -2611,9 +2535,6 @@ TEST_CASE("dl floor scalar") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -2701,9 +2622,6 @@ TEST_CASE("dl floor batch") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -2800,9 +2718,6 @@ TEST_CASE("dl modulus scalar") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -2888,9 +2803,6 @@ TEST_CASE("dl modulus batch") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -2988,8 +2900,6 @@ TEST_CASE("to_size_t") builder.CreateRet(to_size_t(s, in_val)); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -3021,8 +2931,6 @@ TEST_CASE("to_size_t") builder.CreateRetVoid(); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -3057,8 +2965,6 @@ TEST_CASE("to_size_t") builder.CreateRet(to_size_t(s, in_val)); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -3090,8 +2996,6 @@ TEST_CASE("to_size_t") builder.CreateRetVoid(); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -3139,8 +3043,6 @@ TEST_CASE("real_ext_load") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -3182,8 +3084,6 @@ TEST_CASE("switch") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -3216,8 +3116,6 @@ TEST_CASE("switch") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -3252,8 +3150,6 @@ TEST_CASE("switch") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); diff --git a/test/llvm_state.cpp b/test/llvm_state.cpp index c3753313b..2e369b675 100644 --- a/test/llvm_state.cpp +++ b/test/llvm_state.cpp @@ -67,6 +67,9 @@ TEST_CASE("empty state") std::cout << s << '\n'; std::cout << s.get_ir() << '\n'; + REQUIRE(!s.get_bc().empty()); + REQUIRE(!s.get_ir().empty()); + // Print also some info on the FP types. std::cout << "Double digits : " << std::numeric_limits::digits << '\n'; std::cout << "Long double digits: " << std::numeric_limits::digits << '\n'; @@ -82,12 +85,16 @@ TEST_CASE("copy semantics") llvm_state s{kw::mname = "sample state", kw::opt_level = 2u, kw::fast_math = true}; + taylor_add_jet(s, "jet", {x * y, y * x}, 1, 1, true, false); + REQUIRE(s.module_name() == "sample state"); REQUIRE(s.opt_level() == 2u); REQUIRE(s.fast_math()); REQUIRE(!s.is_compiled()); + REQUIRE(!s.has_object_code()); - taylor_add_jet(s, "jet", {x * y, y * x}, 1, 1, true, false); + const auto orig_ir = s.get_ir(); + const auto orig_bc = s.get_bc(); auto s2 = s; @@ -95,6 +102,10 @@ TEST_CASE("copy semantics") REQUIRE(s2.opt_level() == 2u); REQUIRE(s2.fast_math()); REQUIRE(!s2.is_compiled()); + REQUIRE(!s2.has_object_code()); + + REQUIRE(s2.get_ir() == orig_ir); + REQUIRE(s2.get_bc() == orig_bc); s2.compile(); @@ -116,14 +127,32 @@ TEST_CASE("copy semantics") taylor_add_jet(s, "jet", {x * y, y * x}, 1, 1, true, false); + // On-the-fly testing for string repr. + std::ostringstream oss; + oss << s; + const auto orig_repr = oss.str(); + s.compile(); + oss.str(""); + oss << s; + const auto compiled_repr = oss.str(); + + REQUIRE(orig_repr != compiled_repr); + + const auto orig_ir = s.get_ir(); + const auto orig_bc = s.get_bc(); + auto s2 = s; REQUIRE(s2.module_name() == "sample state"); REQUIRE(s2.opt_level() == 2u); REQUIRE(s2.fast_math()); REQUIRE(s2.is_compiled()); + REQUIRE(!s2.has_object_code()); + + REQUIRE(s2.get_ir() == orig_ir); + REQUIRE(s2.get_bc() == orig_bc); auto jptr = reinterpret_cast(s2.jit_lookup("jet")); @@ -147,12 +176,19 @@ TEST_CASE("copy semantics") auto jptr = reinterpret_cast(s.jit_lookup("jet")); + const auto orig_ir = s.get_ir(); + const auto orig_bc = s.get_bc(); + auto s2 = s; REQUIRE(s2.module_name() == "sample state"); REQUIRE(s2.opt_level() == 2u); REQUIRE(s2.fast_math()); REQUIRE(s2.is_compiled()); + REQUIRE(s2.has_object_code()); + + REQUIRE(s2.get_ir() == orig_ir); + REQUIRE(s2.get_bc() == orig_bc); jptr = reinterpret_cast(s2.jit_lookup("jet")); @@ -200,9 +236,10 @@ TEST_CASE("s11n") { std::stringstream ss; - llvm_state s; + llvm_state s{kw::mname = "foo"}; const auto orig_ir = s.get_ir(); + const auto orig_bc = s.get_bc(); { boost::archive::binary_oarchive oa(ss); @@ -219,8 +256,10 @@ TEST_CASE("s11n") } REQUIRE(!s.is_compiled()); + REQUIRE(!s.has_object_code()); REQUIRE(s.get_ir() == orig_ir); - REQUIRE(s.module_name() == ""); + REQUIRE(s.get_bc() == orig_bc); + REQUIRE(s.module_name() == "foo"); REQUIRE(s.opt_level() == 3u); REQUIRE(s.fast_math() == false); REQUIRE(s.force_avx512() == false); @@ -230,14 +269,15 @@ TEST_CASE("s11n") { std::stringstream ss; - llvm_state s{kw::force_avx512 = true}; + llvm_state s{kw::force_avx512 = true, kw::mname = "foo"}; taylor_add_jet(s, "jet", {x * y, y * x}, 1, 1, true, false); - const auto orig_ir = s.get_ir(); - s.compile(); + const auto orig_ir = s.get_ir(); + const auto orig_bc = s.get_bc(); + { boost::archive::binary_oarchive oa(ss); @@ -253,8 +293,10 @@ TEST_CASE("s11n") } REQUIRE(s.is_compiled()); + REQUIRE(!s.has_object_code()); + REQUIRE(s.module_name() == "foo"); REQUIRE(s.get_ir() == orig_ir); - REQUIRE(s.module_name() == ""); + REQUIRE(s.get_bc() == orig_bc); REQUIRE(s.opt_level() == 3u); REQUIRE(s.fast_math() == false); REQUIRE(s.force_avx512() == true); @@ -264,14 +306,15 @@ TEST_CASE("s11n") { std::stringstream ss; - llvm_state s; + llvm_state s{kw::mname = "foo"}; taylor_add_jet(s, "jet", {-1_dbl, x + y}, 1, 1, true, false); - const auto orig_ir = s.get_ir(); - s.compile(); + const auto orig_ir = s.get_ir(); + const auto orig_bc = s.get_bc(); + s.jit_lookup("jet"); { @@ -289,8 +332,10 @@ TEST_CASE("s11n") } REQUIRE(s.is_compiled()); + REQUIRE(s.has_object_code()); REQUIRE(s.get_ir() == orig_ir); - REQUIRE(s.module_name() == ""); + REQUIRE(s.get_bc() == orig_bc); + REQUIRE(s.module_name() == "foo"); REQUIRE(s.opt_level() == 3u); REQUIRE(s.fast_math() == false); diff --git a/test/model_fixed_centres.cpp b/test/model_fixed_centres.cpp index 7a5d33a3a..151971b39 100644 --- a/test/model_fixed_centres.cpp +++ b/test/model_fixed_centres.cpp @@ -102,7 +102,6 @@ TEST_CASE("basic") s, "en", {model::fixed_centres_energy(kw::Gconst = 1.02, kw::masses = {1.01}, kw::positions = {1., 2., 3.})}, kw::vars = {"x"_var, "y"_var, "z"_var, "vx"_var, "vy"_var, "vz"_var}); - s.optimise(); s.compile(); auto *cf @@ -144,7 +143,6 @@ TEST_CASE("basic") REQUIRE(dc.size() == 626u); - s.optimise(); s.compile(); auto *cf diff --git a/test/model_mascon.cpp b/test/model_mascon.cpp index e82f34910..629d17495 100644 --- a/test/model_mascon.cpp +++ b/test/model_mascon.cpp @@ -330,7 +330,6 @@ TEST_CASE("basic cmp") REQUIRE(dc1.size() == 27u); REQUIRE(dc2.size() == 30u); - s.optimise(); s.compile(); auto *cf diff --git a/test/model_nbody.cpp b/test/model_nbody.cpp index 1a7bd984a..f84084ec2 100644 --- a/test/model_nbody.cpp +++ b/test/model_nbody.cpp @@ -96,7 +96,6 @@ TEST_CASE("nbody") REQUIRE(dc.size() == 146u); - s.optimise(); s.compile(); double en_out = 0; @@ -132,7 +131,6 @@ TEST_CASE("nbody") REQUIRE(dc.size() == 114u); - s.optimise(); s.compile(); double en_out = 0; @@ -179,7 +177,6 @@ TEST_CASE("nbody") REQUIRE(dc.size() == 146u); - s.optimise(); s.compile(); double en_out = 0; @@ -228,7 +225,6 @@ TEST_CASE("nbody") REQUIRE(dc.size() == 114u); - s.optimise(); s.compile(); double en_out = 0; @@ -279,7 +275,6 @@ TEST_CASE("nbody") REQUIRE(dc.size() == 146u); - s.optimise(); s.compile(); double en_out = 0; @@ -331,7 +326,6 @@ TEST_CASE("nbody") REQUIRE(dc.size() == 114u); - s.optimise(); s.compile(); double en_out = 0; @@ -378,7 +372,6 @@ TEST_CASE("nbody") REQUIRE(dc.size() == 124u); - s.optimise(); s.compile(); double en_out = 0; @@ -423,7 +416,6 @@ TEST_CASE("nbody") REQUIRE(dc.size() == 37u); - s.optimise(); s.compile(); double en_out = 0; @@ -511,7 +503,6 @@ TEST_CASE("np1body") REQUIRE(dc.size() == 158u); - s.optimise(); s.compile(); double en_out = 0; @@ -547,7 +538,6 @@ TEST_CASE("np1body") REQUIRE(dc.size() == 123u); - s.optimise(); s.compile(); double en_out = 0; @@ -583,7 +573,6 @@ TEST_CASE("np1body") REQUIRE(dc.size() == 158u); - s.optimise(); s.compile(); double en_out = 0; @@ -619,7 +608,6 @@ TEST_CASE("np1body") REQUIRE(dc.size() == 123u); - s.optimise(); s.compile(); double en_out = 0; @@ -657,7 +645,6 @@ TEST_CASE("np1body") REQUIRE(dc.size() == 158u); - s.optimise(); s.compile(); double en_out = 0; @@ -696,7 +683,6 @@ TEST_CASE("np1body") REQUIRE(dc.size() == 123u); - s.optimise(); s.compile(); double en_out = 0; @@ -730,7 +716,6 @@ TEST_CASE("np1body") REQUIRE(dc.size() == 136u); - s.optimise(); s.compile(); double en_out = 0; @@ -768,7 +753,6 @@ TEST_CASE("np1body") REQUIRE(dc.size() == 141u); - s.optimise(); s.compile(); double en_out = 0; @@ -803,7 +787,6 @@ TEST_CASE("np1body") REQUIRE(dc.size() == 31u); - s.optimise(); s.compile(); double en_out = 0; diff --git a/test/model_rotating.cpp b/test/model_rotating.cpp index 29370568e..209841b15 100644 --- a/test/model_rotating.cpp +++ b/test/model_rotating.cpp @@ -86,7 +86,6 @@ TEST_CASE("basic") REQUIRE(dc.size() == 19u); - s.optimise(); s.compile(); auto *cf @@ -128,7 +127,6 @@ TEST_CASE("basic") REQUIRE(dc.size() == 20u); - s.optimise(); s.compile(); auto *cf diff --git a/test/number.cpp b/test/number.cpp index 78870214b..d48e373b0 100644 --- a/test/number.cpp +++ b/test/number.cpp @@ -367,8 +367,6 @@ TEST_CASE("llvm_codegen") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -395,8 +393,6 @@ TEST_CASE("llvm_codegen") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -422,8 +418,6 @@ TEST_CASE("llvm_codegen") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -452,8 +446,6 @@ TEST_CASE("llvm_codegen") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -480,8 +472,6 @@ TEST_CASE("llvm_codegen") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -507,8 +497,6 @@ TEST_CASE("llvm_codegen") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -539,8 +527,6 @@ TEST_CASE("llvm_codegen") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -567,8 +553,6 @@ TEST_CASE("llvm_codegen") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -594,8 +578,6 @@ TEST_CASE("llvm_codegen") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -624,8 +606,6 @@ TEST_CASE("llvm_codegen") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -652,8 +632,6 @@ TEST_CASE("llvm_codegen") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -679,8 +657,6 @@ TEST_CASE("llvm_codegen") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -709,8 +685,6 @@ TEST_CASE("llvm_codegen") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -743,8 +717,6 @@ TEST_CASE("llvm_codegen") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); diff --git a/test/poly_enclosures.cpp b/test/poly_enclosures.cpp index fc596eaf1..1bbbc6fb0 100644 --- a/test/poly_enclosures.cpp +++ b/test/poly_enclosures.cpp @@ -140,9 +140,6 @@ TEST_CASE("polynomial enclosures") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -284,9 +281,6 @@ TEST_CASE("polynomial enclosures mp") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); diff --git a/test/prod.cpp b/test/prod.cpp index 18f1674b0..0524b470f 100644 --- a/test/prod.cpp +++ b/test/prod.cpp @@ -582,7 +582,6 @@ TEST_CASE("prod split") } } - ls.optimise(); ls.compile(); auto *cf_ptr diff --git a/test/sum.cpp b/test/sum.cpp index 1dd407ce8..9b61b22e6 100644 --- a/test/sum.cpp +++ b/test/sum.cpp @@ -459,7 +459,6 @@ TEST_CASE("sum split") } } - ls.optimise(); ls.compile(); auto *cf_ptr diff --git a/test/sum_sq.cpp b/test/sum_sq.cpp index 4586fd17e..01cf23f1d 100644 --- a/test/sum_sq.cpp +++ b/test/sum_sq.cpp @@ -262,7 +262,6 @@ TEST_CASE("cfunc") const auto dc = add_cfunc(s, "cfunc", {sum_sq({x, y, cos(sum_sq({x, y}))})}); - s.optimise(); s.compile(); auto *cf_ptr = reinterpret_cast(