From ec3392d2cbcdcdad2c597f3d25ed901a41e77254 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Mon, 14 Aug 2023 10:32:56 +0200 Subject: [PATCH 01/13] Roll the optimisation pass into llvm_state::compile(), fix llvm_state copying/des11n not handling the module name correctly. --- benchmark/large_cfunc.cpp | 1 - benchmark/penc_comparison.cpp | 3 - include/heyoka/taylor.hpp | 2 +- src/detail/event_detection.cpp | 6 -- src/expression_cfunc.cpp | 3 - src/llvm_state.cpp | 54 ++++++++++++++-- src/taylor_00.cpp | 61 ------------------ src/taylor_01.cpp | 15 +---- src/taylor_02.cpp | 3 - test/event_detection.cpp | 1 - test/event_detection_mp.cpp | 4 -- test/expression_diff_tensors.cpp | 5 -- test/llvm_helpers.cpp | 104 ------------------------------- test/llvm_state.cpp | 29 ++++++--- test/model_fixed_centres.cpp | 2 - test/model_mascon.cpp | 1 - test/model_nbody.cpp | 17 ----- test/model_rotating.cpp | 2 - test/number.cpp | 28 --------- test/poly_enclosures.cpp | 6 -- test/prod.cpp | 1 - test/sum.cpp | 1 - test/sum_sq.cpp | 1 - 23 files changed, 72 insertions(+), 278 deletions(-) diff --git a/benchmark/large_cfunc.cpp b/benchmark/large_cfunc.cpp index c70c9f51d..cc9fdb069 100644 --- a/benchmark/large_cfunc.cpp +++ b/benchmark/large_cfunc.cpp @@ -56,7 +56,6 @@ int main() add_cfunc(s, "en", {en}, kw::vars = {x, y, z, vx, vy, vz}, kw::compact_mode = true); - s.optimise(); s.compile(); [[maybe_unused]] auto fn = s.jit_lookup("en"); diff --git a/benchmark/penc_comparison.cpp b/benchmark/penc_comparison.cpp index e1d2dcb94..f602b23e6 100644 --- a/benchmark/penc_comparison.cpp +++ b/benchmark/penc_comparison.cpp @@ -133,9 +133,6 @@ void run_benchmark(unsigned order) // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); diff --git a/include/heyoka/taylor.hpp b/include/heyoka/taylor.hpp index 3602e51c7..5aeca1825 100644 --- a/include/heyoka/taylor.hpp +++ b/include/heyoka/taylor.hpp @@ -134,7 +134,7 @@ taylor_c_diff_func_name_args(llvm::LLVMContext &, llvm::Type *, const std::strin // Add a function for computing the dense output // via polynomial evaluation. void taylor_add_d_out_function(llvm_state &, llvm::Type *, std::uint32_t, std::uint32_t, std::uint32_t, bool, - bool = true, bool = true); + bool = true); } // namespace detail diff --git a/src/detail/event_detection.cpp b/src/detail/event_detection.cpp index d1eb1de47..c6af77b14 100644 --- a/src/detail/event_detection.cpp +++ b/src/detail/event_detection.cpp @@ -911,9 +911,6 @@ taylor_adaptive::ed_data::ed_data(llvm_state s, std::vector tes, s // Add the function for the fast exclusion check. detail::llvm_add_fex_check(m_state, fp_t, order, 1); - // Run the optimisation pass. - m_state.optimise(); - // Compile. m_state.compile(); @@ -1548,9 +1545,6 @@ taylor_adaptive_batch::ed_data::ed_data(llvm_state s, std::vector // NOTE: the fast exclusion check is vectorised. detail::llvm_add_fex_check(m_state, fp_t, order, batch_size); - // Run the optimisation pass. - m_state.optimise(); - // Compile. m_state.compile(); diff --git a/src/expression_cfunc.cpp b/src/expression_cfunc.cpp index 34b6798b9..f825a7431 100644 --- a/src/expression_cfunc.cpp +++ b/src/expression_cfunc.cpp @@ -1722,9 +1722,6 @@ auto add_cfunc_impl(llvm_state &s, const std::string &name, const F &fn, std::ui // Restore the original insertion block. builder.SetInsertPoint(orig_bb); - // Run the optimisation pass. - s.optimise(); - return dc; } diff --git a/src/llvm_state.cpp b/src/llvm_state.cpp index d8f03125f..86cc96f7b 100644 --- a/src/llvm_state.cpp +++ b/src/llvm_state.cpp @@ -518,7 +518,11 @@ void llvm_state_add_obj_to_jit(Jit &j, const std::string &obj) } // Helper to create an LLVM module from a IR in string representation. -auto llvm_state_ir_to_module(const std::string &ir, llvm::LLVMContext &ctx) +// NOTE: the module name needs to be passed explicitly (although it is already +// contained in the IR) because apparently llvm::parseIR() discards the module +// name when parsing. +// NOLINTNEXTLINE(bugprone-easily-swappable-parameters) +auto llvm_state_ir_to_module(const std::string &module_name, const std::string &ir, llvm::LLVMContext &ctx) { // Create the corresponding memory buffer. auto mb = llvm::MemoryBuffer::getMemBuffer(ir); @@ -538,6 +542,9 @@ auto llvm_state_ir_to_module(const std::string &ir, llvm::LLVMContext &ctx) } // LCOV_EXCL_STOP + // Set the module name. + ret->setModuleIdentifier(module_name); + return ret; } @@ -594,7 +601,7 @@ llvm_state::llvm_state(const llvm_state &other) const auto other_ir = other.get_ir(); // Create the module from the IR. - m_module = detail::llvm_state_ir_to_module(other_ir, context()); + m_module = detail::llvm_state_ir_to_module(m_module_name, other_ir, context()); // Create a new builder for the module. m_builder = std::make_unique(context()); @@ -606,7 +613,24 @@ llvm_state::llvm_state(const llvm_state &other) // NOTE: compilation will take care of setting up m_ir_snapshot. // If no compilation happens, m_ir_snapshot is left empty after init. if (other.is_compiled()) { + // NOTE: we need to temporarily disable optimisations + // before compilation, for the following reason. + // + // Recall that here we are in the case + // in which the 'other' llvm_state had been compiled, but + // no object code had been produced yet. This means the IR + // had already been optimised, and by running another optimisation + // pass now (indirectly, via compile()) we might end + // up modifying the already-optimised IR. + // By temporarily setting m_opt_level to zero, we are preventing + // any modification to the IR and ensuring that, after copying, + // we have exactly reproduced the original llvm_state object. + const auto orig_opt_level = m_opt_level; + m_opt_level = 0; + compile(); + + m_opt_level = orig_opt_level; } } } @@ -774,7 +798,7 @@ void llvm_state::load_impl(Archive &ar, unsigned version) m_ir_snapshot.clear(); // Create the module from the IR. - m_module = detail::llvm_state_ir_to_module(ir, context()); + m_module = detail::llvm_state_ir_to_module(m_module_name, ir, context()); // Create a new builder for the module. m_builder = std::make_unique(context()); @@ -787,7 +811,24 @@ void llvm_state::load_impl(Archive &ar, unsigned version) // If no compilation happens, m_ir_snapshot is left empty after // clearing earlier. if (cmp) { + // NOTE: we need to temporarily disable optimisations + // before compilation, for the following reason. + // + // Recall that here we are in the case + // in which the serialised llvm_state had been compiled, but + // no object code had been produced yet. This means the IR + // had already been optimised, and by running another optimisation + // pass (indirectly, via compile()) now we might end + // up modifying the already-optimised IR. + // By temporarily setting m_opt_level to zero, we are preventing + // any modification to the IR and ensuring that, after deserialisation, + // we have exactly reproduced the original llvm_state object. + const auto orig_opt_level = m_opt_level; + m_opt_level = 0; + compile(); + + m_opt_level = orig_opt_level; } } // LCOV_EXCL_START @@ -1133,6 +1174,8 @@ void llvm_state::optimise() } } +// NOTE: we need to emphasise in the docs that compilation +// triggers an optimisation pass. void llvm_state::compile() { check_uncompiled(__func__); @@ -1149,7 +1192,10 @@ void llvm_state::compile() } try { - // Store a snapshot of the IR before compiling. + // Run the optimisation pass. + optimise(); + + // Store a snapshot of the optimised IR before compiling. m_ir_snapshot = get_ir(); // Add the module (this will clear out m_module). diff --git a/src/taylor_00.cpp b/src/taylor_00.cpp index adc849d07..d5a92dc17 100644 --- a/src/taylor_00.cpp +++ b/src/taylor_00.cpp @@ -105,29 +105,6 @@ std::uint32_t n_pars_in_dc(const taylor_dc_t &dc) namespace { -// RAII helper to temporarily set the opt level to 0 in an llvm_state. -struct opt_disabler { - llvm_state *m_s; - unsigned m_orig_opt_level; - - explicit opt_disabler(llvm_state &s) : m_s(&s), m_orig_opt_level(s.opt_level()) - { - // Disable optimisations. - m_s->opt_level() = 0; - } - - opt_disabler(const opt_disabler &) = delete; - opt_disabler(opt_disabler &&) noexcept = delete; - opt_disabler &operator=(const opt_disabler &) = delete; - opt_disabler &operator=(opt_disabler &&) noexcept = delete; - - ~opt_disabler() - { - // Restore the original optimisation level. - m_s->opt_level() = m_orig_opt_level; - } -}; - // Helper to determine the optimal Taylor order for a given tolerance, // following Jorba's prescription. // NOTE: when T is mppp::real and tol has a low precision, the use @@ -293,11 +270,6 @@ auto taylor_add_adaptive_step_with_events(llvm_state &s, const std::string &name // Verify the function. s.verify_function(f); - // Run the optimisation pass. - // NOTE: this does nothing currently, as the optimisation - // level is set to zero from the outside. - s.optimise(); - return std::tuple{std::move(dc), order}; } @@ -468,11 +440,6 @@ auto taylor_add_adaptive_step(llvm_state &s, const std::string &name, const U &s // Verify the function. s.verify_function(f); - // Run the optimisation pass. - // NOTE: this does nothing currently, as the optimisation - // level is set to zero from the outside. - s.optimise(); - return std::tuple{std::move(dc), order}; } @@ -714,11 +681,6 @@ void taylor_adaptive::finalise_ctor_impl(const U &sys, std::vector state, // Do we have events? const auto with_events = !tes.empty() || !ntes.empty(); - // Temporarily disable optimisations in s, so that - // we don't optimise twice when adding the step - // and then the d_out. - std::optional od(m_llvm); - // Add the stepper function. if (with_events) { std::vector ee; @@ -770,15 +732,6 @@ void taylor_adaptive::finalise_ctor_impl(const U &sys, std::vector state, detail::get_logger()->trace("Taylor dense output runtime: {}", sw); sw.reset(); - // Restore the original optimisation level in s. - od.reset(); - - // Run the optimisation pass manually. - m_llvm.optimise(); - - detail::get_logger()->trace("Taylor global opt pass runtime: {}", sw); - sw.reset(); - // Run the jit. m_llvm.compile(); @@ -2397,11 +2350,6 @@ void taylor_adaptive_batch::finalise_ctor_impl(const U &sys, std::vector s // Do we have events? const auto with_events = !tes.empty() || !ntes.empty(); - // Temporarily disable optimisations in s, so that - // we don't optimise twice when adding the step - // and then the d_out. - std::optional od(m_llvm); - // Add the stepper function. if (with_events) { std::vector ee; @@ -2450,15 +2398,6 @@ void taylor_adaptive_batch::finalise_ctor_impl(const U &sys, std::vector s detail::get_logger()->trace("Taylor batch dense output runtime: {}", sw); sw.reset(); - // Restore the original optimisation level in s. - od.reset(); - - // Run the optimisation pass manually. - m_llvm.optimise(); - - detail::get_logger()->trace("Taylor batch global opt pass runtime: {}", sw); - sw.reset(); - // Run the jit. m_llvm.compile(); diff --git a/src/taylor_01.cpp b/src/taylor_01.cpp index 984c06168..1131975b7 100644 --- a/src/taylor_01.cpp +++ b/src/taylor_01.cpp @@ -1733,7 +1733,7 @@ template class t_event_impl; // Add a function for computing the dense output // via polynomial evaluation. void taylor_add_d_out_function(llvm_state &s, llvm::Type *fp_scal_t, std::uint32_t n_eq, std::uint32_t order, - std::uint32_t batch_size, bool high_accuracy, bool external_linkage, bool optimise) + std::uint32_t batch_size, bool high_accuracy, bool external_linkage) { // LCOV_EXCL_START assert(n_eq > 0u); @@ -1904,11 +1904,6 @@ void taylor_add_d_out_function(llvm_state &s, llvm::Type *fp_scal_t, std::uint32 // Verify the function. s.verify_function(f); - - // Run the optimisation pass, if requested. - if (optimise) { - s.optimise(); - } } } // namespace detail @@ -1960,7 +1955,7 @@ void continuous_output::add_c_out_function(std::uint32_t order, std::uint32_t // Add the function for the computation of the dense output. // NOTE: the dense output function operates on data in external format. - detail::taylor_add_d_out_function(m_llvm_state, fp_t, dim, order, 1, high_accuracy, false, false); + detail::taylor_add_d_out_function(m_llvm_state, fp_t, dim, order, 1, high_accuracy, false); // Fetch it. auto *d_out_f = md.getFunction("d_out_f"); @@ -2152,9 +2147,6 @@ void continuous_output::add_c_out_function(std::uint32_t order, std::uint32_t // Verify the function. m_llvm_state.verify_function(f); - // Run the optimisation pass. - m_llvm_state.optimise(); - // Compile. m_llvm_state.compile(); @@ -2835,9 +2827,6 @@ void continuous_output_batch::add_c_out_function(std::uint32_t order, std::ui // Verify the function. m_llvm_state.verify_function(f); - // Run the optimisation pass. - m_llvm_state.optimise(); - // Compile. m_llvm_state.compile(); diff --git a/src/taylor_02.cpp b/src/taylor_02.cpp index 7b3c85733..2bb8231c4 100644 --- a/src/taylor_02.cpp +++ b/src/taylor_02.cpp @@ -2023,9 +2023,6 @@ auto taylor_add_jet_impl(llvm_state &s, const std::string &name, const U &sys, s // Restore the original insertion block. builder.SetInsertPoint(orig_bb); - // Run the optimisation pass. - s.optimise(); - return dc; } diff --git a/test/event_detection.cpp b/test/event_detection.cpp index a2adf93ac..c062aaadd 100644 --- a/test/event_detection.cpp +++ b/test/event_detection.cpp @@ -110,7 +110,6 @@ TEST_CASE("fast exclusion check") // Add the function and fetch it. detail::llvm_add_fex_check(s, detail::to_llvm_type(s.context()), order, batch_size, use_cs); - s.optimise(); s.compile(); auto fex_check = reinterpret_cast(s.jit_lookup("fex_check")); diff --git a/test/event_detection_mp.cpp b/test/event_detection_mp.cpp index f82bd7ea5..c5eece9cc 100644 --- a/test/event_detection_mp.cpp +++ b/test/event_detection_mp.cpp @@ -67,8 +67,6 @@ TEST_CASE("poly translator 1") detail::add_poly_translator_1(s, detail::llvm_type_like(s, input[0]), 5, 1); - s.optimise(); - s.compile(); auto *pt1 = reinterpret_cast(s.jit_lookup("poly_translate_1")); @@ -100,8 +98,6 @@ TEST_CASE("poly csc") detail::llvm_add_csc(s, detail::llvm_type_like(s, input[0]), 5, 1); - s.optimise(); - s.compile(); auto *pt1 = reinterpret_cast( diff --git a/test/expression_diff_tensors.cpp b/test/expression_diff_tensors.cpp index aa8b26881..eb4be7f6f 100644 --- a/test/expression_diff_tensors.cpp +++ b/test/expression_diff_tensors.cpp @@ -415,7 +415,6 @@ TEST_CASE("fixed centres check") llvm_state s; add_cfunc(s, "diff", diff_vec, kw::vars = vars); - s.optimise(); s.compile(); auto *fr = reinterpret_cast( @@ -462,7 +461,6 @@ TEST_CASE("fixed centres check") // Compile and fetch the expression of the derivative. llvm_state s2; add_cfunc(s2, "diff", {ex}, kw::vars = vars); - s2.optimise(); s2.compile(); auto *fr2 = reinterpret_cast( @@ -514,7 +512,6 @@ TEST_CASE("speelpenning check") llvm_state s; add_cfunc(s, "diff", diff_vec, kw::vars = vars); - s.optimise(); s.compile(); auto *fr = reinterpret_cast( @@ -556,7 +553,6 @@ TEST_CASE("speelpenning check") // Compile and fetch the expression of the derivative. llvm_state s2; add_cfunc(s2, "diff", {ex}, kw::vars = vars); - s2.optimise(); s2.compile(); auto *fr2 = reinterpret_cast( @@ -629,7 +625,6 @@ TEST_CASE("speelpenning complexity") REQUIRE(std::get(dc_reverse[i].value()).extract()->args().size() == 2u); } - s.optimise(); s.compile(); } } diff --git a/test/llvm_helpers.cpp b/test/llvm_helpers.cpp index 193a51de9..226ebbf93 100644 --- a/test/llvm_helpers.cpp +++ b/test/llvm_helpers.cpp @@ -99,9 +99,6 @@ TEST_CASE("sgn scalar") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -153,9 +150,6 @@ TEST_CASE("sgn scalar mp") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -225,9 +219,6 @@ TEST_CASE("sgn batch") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -296,9 +287,6 @@ TEST_CASE("sincos scalar") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -363,9 +351,6 @@ TEST_CASE("sincos batch") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -434,9 +419,6 @@ TEST_CASE("sincos mp") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -484,9 +466,6 @@ TEST_CASE("modulus scalar") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -549,9 +528,6 @@ TEST_CASE("modulus batch") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -597,9 +573,6 @@ TEST_CASE("inv_kep_E_scalar") // Add the function. llvm_add_inv_kep_E_wrapper(s, detail::to_llvm_type(s.context()), 1, "hey_kep"); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -745,9 +718,6 @@ TEST_CASE("inv_kep_E_batch") // Add the function. llvm_add_inv_kep_E_wrapper(s, detail::to_llvm_type(s.context()), batch_size, "hey_kep"); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -986,9 +956,6 @@ TEST_CASE("inv_kep_E_scalar mp") // Add the function. llvm_add_inv_kep_E_wrapper(s, fp_t, 1, "hey_kep"); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -1152,9 +1119,6 @@ TEST_CASE("while_loop") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -1271,8 +1235,6 @@ TEST_CASE("csc_scalar") llvm_add_csc(s, to_llvm_type(s.context()), degree, 1); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup( @@ -1347,8 +1309,6 @@ TEST_CASE("csc_batch") llvm_add_csc(s, to_llvm_type(s.context()), degree, batch_size); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup( @@ -1520,9 +1480,6 @@ TEST_CASE("minmax") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -1667,9 +1624,6 @@ TEST_CASE("fma scalar") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -1728,9 +1682,6 @@ TEST_CASE("fma batch") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -1803,9 +1754,6 @@ TEST_CASE("fma scalar mp") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -1869,9 +1817,6 @@ TEST_CASE("eft_product scalar") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -1954,9 +1899,6 @@ TEST_CASE("eft_product batch") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -2053,9 +1995,6 @@ TEST_CASE("dl mul scalar") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -2161,9 +2100,6 @@ TEST_CASE("dl mul batch") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); // Fetch the function pointer. @@ -2280,9 +2216,6 @@ TEST_CASE("dl div scalar") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -2384,9 +2317,6 @@ TEST_CASE("dl div batch") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); // Fetch the function pointer. @@ -2484,9 +2414,6 @@ TEST_CASE("floor scalar") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -2540,9 +2467,6 @@ TEST_CASE("floor batch") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -2611,9 +2535,6 @@ TEST_CASE("dl floor scalar") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -2701,9 +2622,6 @@ TEST_CASE("dl floor batch") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -2800,9 +2718,6 @@ TEST_CASE("dl modulus scalar") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -2888,9 +2803,6 @@ TEST_CASE("dl modulus batch") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -2988,8 +2900,6 @@ TEST_CASE("to_size_t") builder.CreateRet(to_size_t(s, in_val)); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -3021,8 +2931,6 @@ TEST_CASE("to_size_t") builder.CreateRetVoid(); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -3057,8 +2965,6 @@ TEST_CASE("to_size_t") builder.CreateRet(to_size_t(s, in_val)); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -3090,8 +2996,6 @@ TEST_CASE("to_size_t") builder.CreateRetVoid(); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -3139,8 +3043,6 @@ TEST_CASE("real_ext_load") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -3182,8 +3084,6 @@ TEST_CASE("switch") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -3216,8 +3116,6 @@ TEST_CASE("switch") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -3252,8 +3150,6 @@ TEST_CASE("switch") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); diff --git a/test/llvm_state.cpp b/test/llvm_state.cpp index c3753313b..17d3b7777 100644 --- a/test/llvm_state.cpp +++ b/test/llvm_state.cpp @@ -89,12 +89,15 @@ TEST_CASE("copy semantics") taylor_add_jet(s, "jet", {x * y, y * x}, 1, 1, true, false); + const auto orig_ir = s.get_ir(); + auto s2 = s; REQUIRE(s2.module_name() == "sample state"); REQUIRE(s2.opt_level() == 2u); REQUIRE(s2.fast_math()); REQUIRE(!s2.is_compiled()); + REQUIRE(s2.get_ir() == orig_ir); s2.compile(); @@ -118,12 +121,15 @@ TEST_CASE("copy semantics") s.compile(); + const auto orig_ir = s.get_ir(); + auto s2 = s; REQUIRE(s2.module_name() == "sample state"); REQUIRE(s2.opt_level() == 2u); REQUIRE(s2.fast_math()); REQUIRE(s2.is_compiled()); + REQUIRE(s2.get_ir() == orig_ir); auto jptr = reinterpret_cast(s2.jit_lookup("jet")); @@ -147,12 +153,15 @@ TEST_CASE("copy semantics") auto jptr = reinterpret_cast(s.jit_lookup("jet")); + const auto orig_ir = s.get_ir(); + auto s2 = s; REQUIRE(s2.module_name() == "sample state"); REQUIRE(s2.opt_level() == 2u); REQUIRE(s2.fast_math()); REQUIRE(s2.is_compiled()); + REQUIRE(s2.get_ir() == orig_ir); jptr = reinterpret_cast(s2.jit_lookup("jet")); @@ -200,7 +209,7 @@ TEST_CASE("s11n") { std::stringstream ss; - llvm_state s; + llvm_state s{kw::mname = "foo"}; const auto orig_ir = s.get_ir(); @@ -220,7 +229,7 @@ TEST_CASE("s11n") REQUIRE(!s.is_compiled()); REQUIRE(s.get_ir() == orig_ir); - REQUIRE(s.module_name() == ""); + REQUIRE(s.module_name() == "foo"); REQUIRE(s.opt_level() == 3u); REQUIRE(s.fast_math() == false); REQUIRE(s.force_avx512() == false); @@ -230,14 +239,14 @@ TEST_CASE("s11n") { std::stringstream ss; - llvm_state s{kw::force_avx512 = true}; + llvm_state s{kw::force_avx512 = true, kw::mname = "foo"}; taylor_add_jet(s, "jet", {x * y, y * x}, 1, 1, true, false); - const auto orig_ir = s.get_ir(); - s.compile(); + const auto orig_ir = s.get_ir(); + { boost::archive::binary_oarchive oa(ss); @@ -253,8 +262,8 @@ TEST_CASE("s11n") } REQUIRE(s.is_compiled()); + REQUIRE(s.module_name() == "foo"); REQUIRE(s.get_ir() == orig_ir); - REQUIRE(s.module_name() == ""); REQUIRE(s.opt_level() == 3u); REQUIRE(s.fast_math() == false); REQUIRE(s.force_avx512() == true); @@ -264,14 +273,14 @@ TEST_CASE("s11n") { std::stringstream ss; - llvm_state s; + llvm_state s{kw::mname = "foo"}; taylor_add_jet(s, "jet", {-1_dbl, x + y}, 1, 1, true, false); - const auto orig_ir = s.get_ir(); - s.compile(); + const auto orig_ir = s.get_ir(); + s.jit_lookup("jet"); { @@ -290,7 +299,7 @@ TEST_CASE("s11n") REQUIRE(s.is_compiled()); REQUIRE(s.get_ir() == orig_ir); - REQUIRE(s.module_name() == ""); + REQUIRE(s.module_name() == "foo"); REQUIRE(s.opt_level() == 3u); REQUIRE(s.fast_math() == false); diff --git a/test/model_fixed_centres.cpp b/test/model_fixed_centres.cpp index 7a5d33a3a..151971b39 100644 --- a/test/model_fixed_centres.cpp +++ b/test/model_fixed_centres.cpp @@ -102,7 +102,6 @@ TEST_CASE("basic") s, "en", {model::fixed_centres_energy(kw::Gconst = 1.02, kw::masses = {1.01}, kw::positions = {1., 2., 3.})}, kw::vars = {"x"_var, "y"_var, "z"_var, "vx"_var, "vy"_var, "vz"_var}); - s.optimise(); s.compile(); auto *cf @@ -144,7 +143,6 @@ TEST_CASE("basic") REQUIRE(dc.size() == 626u); - s.optimise(); s.compile(); auto *cf diff --git a/test/model_mascon.cpp b/test/model_mascon.cpp index e82f34910..629d17495 100644 --- a/test/model_mascon.cpp +++ b/test/model_mascon.cpp @@ -330,7 +330,6 @@ TEST_CASE("basic cmp") REQUIRE(dc1.size() == 27u); REQUIRE(dc2.size() == 30u); - s.optimise(); s.compile(); auto *cf diff --git a/test/model_nbody.cpp b/test/model_nbody.cpp index 1a7bd984a..f84084ec2 100644 --- a/test/model_nbody.cpp +++ b/test/model_nbody.cpp @@ -96,7 +96,6 @@ TEST_CASE("nbody") REQUIRE(dc.size() == 146u); - s.optimise(); s.compile(); double en_out = 0; @@ -132,7 +131,6 @@ TEST_CASE("nbody") REQUIRE(dc.size() == 114u); - s.optimise(); s.compile(); double en_out = 0; @@ -179,7 +177,6 @@ TEST_CASE("nbody") REQUIRE(dc.size() == 146u); - s.optimise(); s.compile(); double en_out = 0; @@ -228,7 +225,6 @@ TEST_CASE("nbody") REQUIRE(dc.size() == 114u); - s.optimise(); s.compile(); double en_out = 0; @@ -279,7 +275,6 @@ TEST_CASE("nbody") REQUIRE(dc.size() == 146u); - s.optimise(); s.compile(); double en_out = 0; @@ -331,7 +326,6 @@ TEST_CASE("nbody") REQUIRE(dc.size() == 114u); - s.optimise(); s.compile(); double en_out = 0; @@ -378,7 +372,6 @@ TEST_CASE("nbody") REQUIRE(dc.size() == 124u); - s.optimise(); s.compile(); double en_out = 0; @@ -423,7 +416,6 @@ TEST_CASE("nbody") REQUIRE(dc.size() == 37u); - s.optimise(); s.compile(); double en_out = 0; @@ -511,7 +503,6 @@ TEST_CASE("np1body") REQUIRE(dc.size() == 158u); - s.optimise(); s.compile(); double en_out = 0; @@ -547,7 +538,6 @@ TEST_CASE("np1body") REQUIRE(dc.size() == 123u); - s.optimise(); s.compile(); double en_out = 0; @@ -583,7 +573,6 @@ TEST_CASE("np1body") REQUIRE(dc.size() == 158u); - s.optimise(); s.compile(); double en_out = 0; @@ -619,7 +608,6 @@ TEST_CASE("np1body") REQUIRE(dc.size() == 123u); - s.optimise(); s.compile(); double en_out = 0; @@ -657,7 +645,6 @@ TEST_CASE("np1body") REQUIRE(dc.size() == 158u); - s.optimise(); s.compile(); double en_out = 0; @@ -696,7 +683,6 @@ TEST_CASE("np1body") REQUIRE(dc.size() == 123u); - s.optimise(); s.compile(); double en_out = 0; @@ -730,7 +716,6 @@ TEST_CASE("np1body") REQUIRE(dc.size() == 136u); - s.optimise(); s.compile(); double en_out = 0; @@ -768,7 +753,6 @@ TEST_CASE("np1body") REQUIRE(dc.size() == 141u); - s.optimise(); s.compile(); double en_out = 0; @@ -803,7 +787,6 @@ TEST_CASE("np1body") REQUIRE(dc.size() == 31u); - s.optimise(); s.compile(); double en_out = 0; diff --git a/test/model_rotating.cpp b/test/model_rotating.cpp index 29370568e..209841b15 100644 --- a/test/model_rotating.cpp +++ b/test/model_rotating.cpp @@ -86,7 +86,6 @@ TEST_CASE("basic") REQUIRE(dc.size() == 19u); - s.optimise(); s.compile(); auto *cf @@ -128,7 +127,6 @@ TEST_CASE("basic") REQUIRE(dc.size() == 20u); - s.optimise(); s.compile(); auto *cf diff --git a/test/number.cpp b/test/number.cpp index 78870214b..d48e373b0 100644 --- a/test/number.cpp +++ b/test/number.cpp @@ -367,8 +367,6 @@ TEST_CASE("llvm_codegen") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -395,8 +393,6 @@ TEST_CASE("llvm_codegen") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -422,8 +418,6 @@ TEST_CASE("llvm_codegen") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -452,8 +446,6 @@ TEST_CASE("llvm_codegen") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -480,8 +472,6 @@ TEST_CASE("llvm_codegen") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -507,8 +497,6 @@ TEST_CASE("llvm_codegen") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -539,8 +527,6 @@ TEST_CASE("llvm_codegen") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -567,8 +553,6 @@ TEST_CASE("llvm_codegen") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -594,8 +578,6 @@ TEST_CASE("llvm_codegen") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -624,8 +606,6 @@ TEST_CASE("llvm_codegen") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -652,8 +632,6 @@ TEST_CASE("llvm_codegen") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -679,8 +657,6 @@ TEST_CASE("llvm_codegen") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -709,8 +685,6 @@ TEST_CASE("llvm_codegen") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); @@ -743,8 +717,6 @@ TEST_CASE("llvm_codegen") s.verify_function(f); - s.optimise(); - s.compile(); auto f_ptr = reinterpret_cast(s.jit_lookup("test")); diff --git a/test/poly_enclosures.cpp b/test/poly_enclosures.cpp index fc596eaf1..1bbbc6fb0 100644 --- a/test/poly_enclosures.cpp +++ b/test/poly_enclosures.cpp @@ -140,9 +140,6 @@ TEST_CASE("polynomial enclosures") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); @@ -284,9 +281,6 @@ TEST_CASE("polynomial enclosures mp") // Verify. s.verify_function(f); - // Run the optimisation pass. - s.optimise(); - // Compile. s.compile(); diff --git a/test/prod.cpp b/test/prod.cpp index 18f1674b0..0524b470f 100644 --- a/test/prod.cpp +++ b/test/prod.cpp @@ -582,7 +582,6 @@ TEST_CASE("prod split") } } - ls.optimise(); ls.compile(); auto *cf_ptr diff --git a/test/sum.cpp b/test/sum.cpp index 1dd407ce8..9b61b22e6 100644 --- a/test/sum.cpp +++ b/test/sum.cpp @@ -459,7 +459,6 @@ TEST_CASE("sum split") } } - ls.optimise(); ls.compile(); auto *cf_ptr diff --git a/test/sum_sq.cpp b/test/sum_sq.cpp index 4586fd17e..01cf23f1d 100644 --- a/test/sum_sq.cpp +++ b/test/sum_sq.cpp @@ -262,7 +262,6 @@ TEST_CASE("cfunc") const auto dc = add_cfunc(s, "cfunc", {sum_sq({x, y, cos(sum_sq({x, y}))})}); - s.optimise(); s.compile(); auto *cf_ptr = reinterpret_cast( From 6a768ffc23b9fdbf03f1d03c1eb51c542f628ebe Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Mon, 14 Aug 2023 10:43:36 +0200 Subject: [PATCH 02/13] Restore the original insertion block when exiting taylor_add_d_out_function(). --- src/taylor_01.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/taylor_01.cpp b/src/taylor_01.cpp index 1131975b7..f3c4590f9 100644 --- a/src/taylor_01.cpp +++ b/src/taylor_01.cpp @@ -1747,6 +1747,9 @@ void taylor_add_d_out_function(llvm_state &s, llvm::Type *fp_scal_t, std::uint32 // Fetch the external type corresponding to fp_scal_t. auto *ext_fp_scal_t = llvm_ext_type(fp_scal_t); + // Fetch the current insertion block. + auto *orig_bb = builder.GetInsertBlock(); + // The function arguments: // - the output pointer (read/write, used also for accumulation), // - the pointer to the Taylor coefficients (read-only), @@ -1904,6 +1907,9 @@ void taylor_add_d_out_function(llvm_state &s, llvm::Type *fp_scal_t, std::uint32 // Verify the function. s.verify_function(f); + + // Restore the original insertion block. + builder.SetInsertPoint(orig_bb); } } // namespace detail From 7764b35413f9cea091819e8b8aee5bed62b333c4 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Mon, 14 Aug 2023 10:49:25 +0200 Subject: [PATCH 03/13] Update changelog. --- doc/changelog.rst | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/doc/changelog.rst b/doc/changelog.rst index e44b04d43..24e5aa011 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -1,6 +1,24 @@ Changelog ========= +1.0.1 (unreleased) +------------------ + +Changes +~~~~~~~ + +- The optimisation pass in an ``llvm_state`` is now automatically + called during compilation + (`#337 `__). + +Fix +~~~ + +- Fix LLVM module name not being preserved during + copy and deserialisation of ``llvm_state`` + (`#337 `__). +- Fix broken link in the docs. + 1.0.0 (2023-08-10) ------------------ From 6c2a29492f08780458db1b76f0e8de54ea84bf9a Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Mon, 14 Aug 2023 10:51:06 +0200 Subject: [PATCH 04/13] Bump to 1.1.0 for development. --- CMakeLists.txt | 4 ++-- doc/changelog.rst | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b7ceb87ca..bf7ffdf12 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,7 +11,7 @@ if(NOT CMAKE_BUILD_TYPE) FORCE) endif() -project(heyoka VERSION 1.0.0 LANGUAGES CXX C) +project(heyoka VERSION 1.1.0 LANGUAGES CXX C) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" "${CMAKE_CURRENT_SOURCE_DIR}/cmake/yacma") @@ -293,7 +293,7 @@ if(HEYOKA_WITH_SLEEF) endif() # Setup the heyoka ABI version number. -set(HEYOKA_ABI_VERSION 22) +set(HEYOKA_ABI_VERSION 23) if(HEYOKA_BUILD_STATIC_LIBRARY) # Setup of the heyoka static library. diff --git a/doc/changelog.rst b/doc/changelog.rst index 24e5aa011..a80087c7c 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -1,7 +1,7 @@ Changelog ========= -1.0.1 (unreleased) +1.1.0 (unreleased) ------------------ Changes From 3b71075e44ddd383dc66b7e23dfce4a6975e6ed4 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Mon, 14 Aug 2023 11:37:58 +0200 Subject: [PATCH 05/13] Fix changelog. --- doc/changelog.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/changelog.rst b/doc/changelog.rst index a80087c7c..92079cbc9 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -9,14 +9,14 @@ Changes - The optimisation pass in an ``llvm_state`` is now automatically called during compilation - (`#337 `__). + (`#339 `__). Fix ~~~ - Fix LLVM module name not being preserved during copy and deserialisation of ``llvm_state`` - (`#337 `__). + (`#339 `__). - Fix broken link in the docs. 1.0.0 (2023-08-10) From 8500104c5f1aa647a75d4cd269a13abc9ba4ce48 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Tue, 15 Aug 2023 10:11:05 +0200 Subject: [PATCH 06/13] Clean up a bit the kwargs ctor for llvm_state. --- include/heyoka/llvm_state.hpp | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/include/heyoka/llvm_state.hpp b/include/heyoka/llvm_state.hpp index 2e6cb0554..d672184fd 100644 --- a/include/heyoka/llvm_state.hpp +++ b/include/heyoka/llvm_state.hpp @@ -210,19 +210,20 @@ class HEYOKA_DLL_PUBLIC llvm_state // end of a constructor. HEYOKA_DLL_LOCAL void ctor_setup_math_flags(); -public: - llvm_state(); - // NOTE: enable the kwargs ctor only if: + // Meta-programming for the kwargs ctor. Enabled if: // - there is at least 1 argument (i.e., cannot act as a def ctor), // - if there is only 1 argument, it cannot be of type llvm_state // (so that it does not interfere with copy/move ctors). - template 0u) - && (sizeof...(KwArgs) > 1u - || std::conjunction_v, llvm_state>>...>), - int> - = 0> + template + using kwargs_ctor_enabler = std::enable_if_t< + (sizeof...(KwArgs) > 0u) + && (sizeof...(KwArgs) > 1u + || std::conjunction_v, llvm_state>>...>), + int>; + +public: + llvm_state(); + template = 0> explicit llvm_state(KwArgs &&...kw_args) : llvm_state(kw_args_ctor_impl(std::forward(kw_args)...)) { } From 5c5d51e7947907adb321e9e6985fe2639a2342ab Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Tue, 15 Aug 2023 15:07:58 +0200 Subject: [PATCH 07/13] In the llvm_state::jit structure, keep around a pointer to the parent llvm_state. --- src/llvm_state.cpp | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/src/llvm_state.cpp b/src/llvm_state.cpp index 86cc96f7b..33048ee4b 100644 --- a/src/llvm_state.cpp +++ b/src/llvm_state.cpp @@ -310,6 +310,9 @@ std::uint32_t recommended_simd_size() // Implementation of the jit class. struct llvm_state::jit { + // NOTE: this is the llvm_state containing + // the jit instance. + const llvm_state *m_state = nullptr; std::unique_ptr m_lljit; std::unique_ptr m_tm; std::unique_ptr m_ctx; @@ -318,7 +321,7 @@ struct llvm_state::jit { #endif std::optional m_object_file; - jit() + explicit jit(const llvm_state *state) : m_state(state) { // Ensure the native target is inited. detail::init_native_target(); @@ -414,6 +417,7 @@ struct llvm_state::jit { #endif } + jit() = delete; jit(const jit &) = delete; jit(jit &&) = delete; jit &operator=(const jit &) = delete; @@ -553,7 +557,7 @@ auto llvm_state_ir_to_module(const std::string &module_name, const std::string & } // namespace detail llvm_state::llvm_state(std::tuple &&tup) - : m_jitter(std::make_unique()), m_opt_level(std::get<1>(tup)), m_fast_math(std::get<2>(tup)), + : m_jitter(std::make_unique(this)), m_opt_level(std::get<1>(tup)), m_fast_math(std::get<2>(tup)), m_force_avx512(std::get<3>(tup)), m_module_name(std::move(std::get<0>(tup))) { // Create the module. @@ -577,7 +581,7 @@ llvm_state::llvm_state(const llvm_state &other) // NOTE: start off by: // - creating a new jit, // - copying over the options from other. - : m_jitter(std::make_unique()), m_opt_level(other.m_opt_level), m_fast_math(other.m_fast_math), + : m_jitter(std::make_unique(this)), m_opt_level(other.m_opt_level), m_fast_math(other.m_fast_math), m_force_avx512(other.m_force_avx512), m_module_name(other.m_module_name) { if (other.is_compiled() && other.m_jitter->m_object_file) { @@ -635,7 +639,16 @@ llvm_state::llvm_state(const llvm_state &other) } } -llvm_state::llvm_state(llvm_state &&) noexcept = default; +// NOTE: this needs to be implemented manually as we need +// to set up correctly the m_state member of the jit instance. +llvm_state::llvm_state(llvm_state &&other) noexcept + : m_jitter(std::move(other.m_jitter)), m_module(std::move(other.m_module)), m_builder(std::move(other.m_builder)), + m_opt_level(other.m_opt_level), m_ir_snapshot(std::move(other.m_ir_snapshot)), m_fast_math(other.m_fast_math), + m_force_avx512(other.m_force_avx512), m_module_name(std::move(other.m_module_name)) +{ + // Set up m_state. + m_jitter->m_state = this; +} llvm_state &llvm_state::operator=(const llvm_state &other) { @@ -650,6 +663,7 @@ llvm_state &llvm_state::operator=(const llvm_state &other) // needs to be done in a different order (specifically, we need to // ensure that the LLVM objects in this are destroyed in a specific // order). +// NOTE: we also need to set up correctly the m_state member of the jit instance. llvm_state &llvm_state::operator=(llvm_state &&other) noexcept { if (this != &other) { @@ -658,6 +672,9 @@ llvm_state &llvm_state::operator=(llvm_state &&other) noexcept m_module = std::move(other.m_module); m_jitter = std::move(other.m_jitter); + // Set up m_state. + m_jitter->m_state = this; + // The remaining bits. m_opt_level = other.m_opt_level; m_ir_snapshot = std::move(other.m_ir_snapshot); @@ -669,7 +686,14 @@ llvm_state &llvm_state::operator=(llvm_state &&other) noexcept return *this; } -llvm_state::~llvm_state() = default; +llvm_state::~llvm_state() +{ + // NOTE: if this has not been moved-from, ensure + // the m_state member of the jit is pointing to this. + if (m_jitter) { + assert(m_jitter->m_state == this); + } +} // NOTE: the save/load logic is essentially the same as in the // copy constructor. Specifically, we have 2 different paths @@ -782,7 +806,7 @@ void llvm_state::load_impl(Archive &ar, unsigned version) m_builder.reset(); // Reset the jit with a new one. - m_jitter = std::make_unique(); + m_jitter = std::make_unique(this); if (cmp && with_obj) { // Assign the ir snapshot. From df5b8695e71fadfab88bdcccd08a299af4deea78 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Wed, 16 Aug 2023 10:40:19 +0200 Subject: [PATCH 08/13] Store the bitcode in llvm_state, in addition to the IR. Also, use the bitcode (instead of the IR) for s11n purposes. --- include/heyoka/llvm_state.hpp | 10 +- src/detail/llvm_helpers.cpp | 2 +- src/llvm_state.cpp | 184 +++++++++++++++++++++++----------- 3 files changed, 131 insertions(+), 65 deletions(-) diff --git a/include/heyoka/llvm_state.hpp b/include/heyoka/llvm_state.hpp index d672184fd..fd05287d9 100644 --- a/include/heyoka/llvm_state.hpp +++ b/include/heyoka/llvm_state.hpp @@ -133,6 +133,7 @@ class HEYOKA_DLL_PUBLIC llvm_state std::unique_ptr m_builder; unsigned m_opt_level; std::string m_ir_snapshot; + std::string m_bc_snapshot; bool m_fast_math; bool m_force_avx512; std::string m_module_name; @@ -247,6 +248,7 @@ class HEYOKA_DLL_PUBLIC llvm_state [[nodiscard]] bool force_avx512() const; [[nodiscard]] std::string get_ir() const; + [[nodiscard]] std::string get_bc() const; void dump_object_code(const std::string &) const; [[nodiscard]] const std::string &get_object_code() const; @@ -256,6 +258,7 @@ class HEYOKA_DLL_PUBLIC llvm_state void optimise(); [[nodiscard]] bool is_compiled() const; + [[nodiscard]] bool has_object_code() const; void compile(); @@ -266,9 +269,10 @@ class HEYOKA_DLL_PUBLIC llvm_state HEYOKA_END_NAMESPACE -// Current archive version is 2. Changelog: +// Archive version changelog: // - version 1: got rid of the inline_functions setting; -// - version 2: added the force_avx512 setting. -BOOST_CLASS_VERSION(heyoka::llvm_state, 2) +// - version 2: added the force_avx512 setting; +// - version 3: added the bitcode snapshot. +BOOST_CLASS_VERSION(heyoka::llvm_state, 3) #endif diff --git a/src/detail/llvm_helpers.cpp b/src/detail/llvm_helpers.cpp index 799882260..cdcf02a9d 100644 --- a/src/detail/llvm_helpers.cpp +++ b/src/detail/llvm_helpers.cpp @@ -939,7 +939,7 @@ std::string llvm_type_name(llvm::Type *t) t->print(ostr, false, true); - return ostr.str(); + return retval; } // This function will return true if: diff --git a/src/llvm_state.cpp b/src/llvm_state.cpp index 33048ee4b..d23f3ddac 100644 --- a/src/llvm_state.cpp +++ b/src/llvm_state.cpp @@ -40,6 +40,8 @@ #include #include #include +#include +#include #include #include #include @@ -463,7 +465,7 @@ struct llvm_state::jit { ostr << err; throw std::invalid_argument(fmt::format( - "The function for adding a module to the jit failed. The full error message:\n{}", ostr.str())); + "The function for adding a module to the jit failed. The full error message:\n{}", err_report)); } // LCOV_EXCL_STOP } @@ -516,40 +518,42 @@ void llvm_state_add_obj_to_jit(Jit &j, const std::string &obj) ostr << err; throw std::invalid_argument(fmt::format( - "The function for adding a compiled module to the jit failed. The full error message:\n{}", ostr.str())); + "The function for adding a compiled module to the jit failed. The full error message:\n{}", err_report)); } // LCOV_EXCL_STOP } -// Helper to create an LLVM module from a IR in string representation. +// Helper to create an LLVM module from bitcode. // NOTE: the module name needs to be passed explicitly (although it is already -// contained in the IR) because apparently llvm::parseIR() discards the module +// contained in the bitcode) because apparently llvm::parseBitcodeFile() discards the module // name when parsing. // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) -auto llvm_state_ir_to_module(const std::string &module_name, const std::string &ir, llvm::LLVMContext &ctx) +auto llvm_state_bc_to_module(const std::string &module_name, const std::string &bc, llvm::LLVMContext &ctx) { // Create the corresponding memory buffer. - auto mb = llvm::MemoryBuffer::getMemBuffer(ir); + auto mb = llvm::MemoryBuffer::getMemBuffer(bc); + assert(mb); - // Construct a new module from the parsed IR. - llvm::SMDiagnostic err; - auto ret = llvm::parseIR(*mb, err, ctx); + // Parse the bitcode. + auto ret = llvm::parseBitcodeFile(mb->getMemBufferRef(), ctx); // LCOV_EXCL_START if (!ret) { + const auto err = ret.takeError(); std::string err_report; llvm::raw_string_ostream ostr(err_report); - err.print("", ostr); + ostr << err; - throw std::invalid_argument(fmt::format("IR parsing failed. The full error message:\n{}", ostr.str())); + throw std::invalid_argument( + fmt::format("LLVM bitcode parsing failed. The full error message:\n{}", err_report)); } // LCOV_EXCL_STOP // Set the module name. - ret->setModuleIdentifier(module_name); + ret.get()->setModuleIdentifier(module_name); - return ret; + return std::move(ret.get()); } } // namespace @@ -587,25 +591,31 @@ llvm_state::llvm_state(const llvm_state &other) if (other.is_compiled() && other.m_jitter->m_object_file) { // 'other' was compiled and code was generated. // We leave module and builder empty, copy over the - // IR snapshot and add the cached compiled module + // IR/bitcode snapshots and add the cached compiled module // to the jit. m_ir_snapshot = other.m_ir_snapshot; + m_bc_snapshot = other.m_bc_snapshot; + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) detail::llvm_state_add_obj_to_jit(*m_jitter, *other.m_jitter->m_object_file); } else { // 'other' has not been compiled yet, or // it has been compiled but no code has been // lazily generated yet. - // We will fetch its IR and reconstruct + // We will fetch its bitcode and reconstruct // module and builder. - // Get the IR of other. - // NOTE: this works regardless of the compiled - // status of other. - const auto other_ir = other.get_ir(); + // Is other compiled? + const auto other_cmp = other.is_compiled(); - // Create the module from the IR. - m_module = detail::llvm_state_ir_to_module(m_module_name, other_ir, context()); + // Create the module from the bitcode. + // NOTE: branch to avoid expensive copy if other + // has been compiled. + if (other_cmp) { + m_module = detail::llvm_state_bc_to_module(m_module_name, other.m_bc_snapshot, context()); + } else { + m_module = detail::llvm_state_bc_to_module(m_module_name, other.get_bc(), context()); + } // Create a new builder for the module. m_builder = std::make_unique(context()); @@ -614,16 +624,16 @@ llvm_state::llvm_state(const llvm_state &other) ctor_setup_math_flags(); // Compile if needed. - // NOTE: compilation will take care of setting up m_ir_snapshot. - // If no compilation happens, m_ir_snapshot is left empty after init. - if (other.is_compiled()) { + // NOTE: compilation will take care of setting up m_ir_snapshot/m_bc_snapshot. + // If no compilation happens, m_ir_snapshot/m_bc_snapshot are left empty after init. + if (other_cmp) { // NOTE: we need to temporarily disable optimisations // before compilation, for the following reason. // // Recall that here we are in the case - // in which the 'other' llvm_state had been compiled, but - // no object code had been produced yet. This means the IR - // had already been optimised, and by running another optimisation + // in which the 'other' llvm_state has been compiled, but + // no object code has been produced yet. This means the IR + // has already been optimised, and by running another optimisation // pass now (indirectly, via compile()) we might end // up modifying the already-optimised IR. // By temporarily setting m_opt_level to zero, we are preventing @@ -634,6 +644,7 @@ llvm_state::llvm_state(const llvm_state &other) compile(); + // Restore the original optimisation level. m_opt_level = orig_opt_level; } } @@ -643,7 +654,8 @@ llvm_state::llvm_state(const llvm_state &other) // to set up correctly the m_state member of the jit instance. llvm_state::llvm_state(llvm_state &&other) noexcept : m_jitter(std::move(other.m_jitter)), m_module(std::move(other.m_module)), m_builder(std::move(other.m_builder)), - m_opt_level(other.m_opt_level), m_ir_snapshot(std::move(other.m_ir_snapshot)), m_fast_math(other.m_fast_math), + m_opt_level(other.m_opt_level), m_ir_snapshot(std::move(other.m_ir_snapshot)), + m_bc_snapshot(std::move(other.m_bc_snapshot)), m_fast_math(other.m_fast_math), m_force_avx512(other.m_force_avx512), m_module_name(std::move(other.m_module_name)) { // Set up m_state. @@ -678,6 +690,7 @@ llvm_state &llvm_state::operator=(llvm_state &&other) noexcept // The remaining bits. m_opt_level = other.m_opt_level; m_ir_snapshot = std::move(other.m_ir_snapshot); + m_bc_snapshot = std::move(other.m_bc_snapshot); m_fast_math = other.m_fast_math; m_force_avx512 = other.m_force_avx512; m_module_name = std::move(other.m_module_name); @@ -695,10 +708,6 @@ llvm_state::~llvm_state() } } -// NOTE: the save/load logic is essentially the same as in the -// copy constructor. Specifically, we have 2 different paths -// depending on whether the state is compiled AND object -// code was generated. template void llvm_state::save_impl(Archive &ar, unsigned) const { @@ -719,14 +728,14 @@ void llvm_state::save_impl(Archive &ar, unsigned) const ar << m_force_avx512; ar << m_module_name; - // Store the IR. - // NOTE: avoid get_ir() if the module has been compiled, + // Store the bitcode. + // NOTE: avoid get_bc() if the module has been compiled, // and use the snapshot directly, so that we don't make // a useless copy. if (cmp) { - ar << m_ir_snapshot; + ar << m_bc_snapshot; } else { - ar << get_ir(); + ar << get_bc(); } if (with_obj) { @@ -734,6 +743,15 @@ void llvm_state::save_impl(Archive &ar, unsigned) const // NOLINTNEXTLINE(bugprone-unchecked-optional-access) ar << *m_jitter->m_object_file; } + + // Save a copy of the IR snapshot if the state + // is compiled and binary code was emitted. + // NOTE: we want this because otherwise we would + // need to re-parse the bitcode during des11n to + // restore the IR snapshot. + if (cmp && with_obj) { + ar << m_ir_snapshot; + } } template @@ -783,9 +801,9 @@ void llvm_state::load_impl(Archive &ar, unsigned version) std::string module_name; ar >> module_name; - // Load the ir - std::string ir; - ar >> ir; + // Load the bitcode. + std::string bc_snapshot; + ar >> bc_snapshot; // Recover the object file, if available. std::optional obj_file; @@ -794,6 +812,12 @@ void llvm_state::load_impl(Archive &ar, unsigned version) ar >> *obj_file; } + // Recover the IR snapshot, if available. + std::string ir_snapshot; + if (cmp && with_obj) { + ar >> ir_snapshot; + } + try { // Set the config options. m_opt_level = opt_level; @@ -809,20 +833,22 @@ void llvm_state::load_impl(Archive &ar, unsigned version) m_jitter = std::make_unique(this); if (cmp && with_obj) { - // Assign the ir snapshot. - m_ir_snapshot = std::move(ir); + // Assign the snapshots. + m_ir_snapshot = std::move(ir_snapshot); + m_bc_snapshot = std::move(bc_snapshot); // Add the object code to the jit. // NOLINTNEXTLINE(bugprone-unchecked-optional-access) detail::llvm_state_add_obj_to_jit(*m_jitter, *obj_file); } else { - // Clear the existing ir snapshot - // (it will be replaced with the - // actual ir if compilation is needed). + // Clear the existing snapshots + // (they will be replaced with the + // actual ir/bitcode if compilation is needed). m_ir_snapshot.clear(); + m_bc_snapshot.clear(); - // Create the module from the IR. - m_module = detail::llvm_state_ir_to_module(m_module_name, ir, context()); + // Create the module from the bitcode. + m_module = detail::llvm_state_bc_to_module(m_module_name, bc_snapshot, context()); // Create a new builder for the module. m_builder = std::make_unique(context()); @@ -831,8 +857,8 @@ void llvm_state::load_impl(Archive &ar, unsigned version) ctor_setup_math_flags(); // Compile if needed. - // NOTE: compilation will take care of setting up m_ir_snapshot. - // If no compilation happens, m_ir_snapshot is left empty after + // NOTE: compilation will take care of setting up m_ir_snapshot/m_bc_snapshot. + // If no compilation happens, m_ir_snapshot/m_bc_snapshot are left empty after // clearing earlier. if (cmp) { // NOTE: we need to temporarily disable optimisations @@ -852,6 +878,7 @@ void llvm_state::load_impl(Archive &ar, unsigned version) compile(); + // Restore the original optimisation level. m_opt_level = orig_opt_level; } } @@ -962,7 +989,7 @@ void llvm_state::verify_function(llvm::Function *f) f->eraseFromParent(); throw std::invalid_argument(fmt::format( - "The verification of the function '{}' failed. The full error message:\n{}", fname, ostr.str())); + "The verification of the function '{}' failed. The full error message:\n{}", fname, err_report)); } } @@ -1211,7 +1238,7 @@ void llvm_state::compile() if (llvm::verifyModule(*m_module, &ostr)) { throw std::runtime_error( - fmt::format("The verification of the module '{}' produced an error:\n{}", m_module_name, ostr.str())); + fmt::format("The verification of the module '{}' produced an error:\n{}", m_module_name, out)); } } @@ -1219,8 +1246,9 @@ void llvm_state::compile() // Run the optimisation pass. optimise(); - // Store a snapshot of the optimised IR before compiling. + // Store a snapshot of the optimised IR and bitcode before compiling. m_ir_snapshot = get_ir(); + m_bc_snapshot = get_bc(); // Add the module (this will clear out m_module). m_jitter->add_module(std::move(m_module)); @@ -1243,6 +1271,11 @@ bool llvm_state::is_compiled() const return !m_module; } +bool llvm_state::has_object_code() const +{ + return static_cast(m_jitter->m_object_file); +} + // NOTE: this function will lookup symbol names, // so it does not necessarily return a function // pointer (could be, e.g., a global variable). @@ -1269,8 +1302,10 @@ std::string llvm_state::get_ir() const // get the IR from it. std::string out; llvm::raw_string_ostream ostr(out); + m_module->print(ostr, nullptr); - return ostr.str(); + + return out; } else { // The module has been compiled. // Return the IR snapshot that @@ -1279,6 +1314,25 @@ std::string llvm_state::get_ir() const } } +std::string llvm_state::get_bc() const +{ + if (m_module) { + // The module has not been compiled yet, + // get the bitcode from it. + std::string out; + llvm::raw_string_ostream ostr(out); + + llvm::WriteBitcodeToFile(*m_module, ostr); + + return out; + } else { + // The module has been compiled. + // Return the bitcode snapshot that + // was created before the compilation. + return m_bc_snapshot; + } +} + // LCOV_EXCL_START void llvm_state::dump_object_code(const std::string &filename) const @@ -1331,15 +1385,23 @@ std::ostream &operator<<(std::ostream &os, const llvm_state &s) std::ostringstream oss; oss << std::boolalpha; - oss << "Module name : " << s.m_module_name << '\n'; - oss << "Compiled : " << s.is_compiled() << '\n'; - oss << "Fast math : " << s.m_fast_math << '\n'; - oss << "Force AVX512 : " << s.m_force_avx512 << '\n'; - oss << "Optimisation level : " << s.m_opt_level << '\n'; - oss << "Target triple : " << s.m_jitter->get_target_triple().str() << '\n'; - oss << "Target CPU : " << s.m_jitter->get_target_cpu() << '\n'; - oss << "Target features : " << s.m_jitter->get_target_features() << '\n'; - oss << "IR size : " << s.get_ir().size() << '\n'; + oss << "Module name : " << s.m_module_name << '\n'; + oss << "Compiled : " << s.is_compiled() << '\n'; + oss << "Has object code : " << s.has_object_code() << '\n'; + oss << "Fast math : " << s.m_fast_math << '\n'; + oss << "Force AVX512 : " << s.m_force_avx512 << '\n'; + oss << "Optimisation level: " << s.m_opt_level << '\n'; + oss << "Data layout : " << s.m_jitter->m_lljit->getDataLayout().getStringRepresentation() << '\n'; + oss << "Target triple : " << s.m_jitter->get_target_triple().str() << '\n'; + oss << "Target CPU : " << s.m_jitter->get_target_cpu() << '\n'; + oss << "Target features : " << s.m_jitter->get_target_features() << '\n'; + oss << "Bitcode size : "; + + if (s.is_compiled()) { + oss << s.m_bc_snapshot.size() << '\n'; + } else { + oss << s.get_bc().size() << '\n'; + } return os << oss.str(); } From 0f03b04c2b63c5c51239372ab3f8e95b0d7193f7 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Wed, 16 Aug 2023 13:39:15 +0200 Subject: [PATCH 09/13] Fix object file not being properly copied/deserialised. --- src/llvm_state.cpp | 22 +++++++++++++++++++--- test/llvm_state.cpp | 26 ++++++++++++++++++++++++-- 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/src/llvm_state.cpp b/src/llvm_state.cpp index d23f3ddac..32bfc30ef 100644 --- a/src/llvm_state.cpp +++ b/src/llvm_state.cpp @@ -8,6 +8,7 @@ #include +#include #include #include #include @@ -358,10 +359,18 @@ struct llvm_state::jit { // when it is lazily generated. m_lljit->getObjTransformLayer().setTransform([this](std::unique_ptr obj_buffer) { assert(obj_buffer); - assert(!m_object_file); - // Copy obj_buffer to the local m_object_file member. - m_object_file.emplace(obj_buffer->getBufferStart(), obj_buffer->getBufferEnd()); + // NOTE: this callback will be invoked the first time a jit lookup is performed, + // even if the object code was copied from another llvm_state or + // loaded from an archive. In such a case, m_object_file has already been set up properly and we just sanity + // check in debug mode that the content of m_object_file matches the content of obj_buffer. + if (m_object_file) { + assert(obj_buffer->getBufferSize() == m_object_file->size()); + assert(std::equal(obj_buffer->getBufferStart(), obj_buffer->getBufferEnd(), m_object_file->begin())); + } else { + // Copy obj_buffer to the local m_object_file member. + m_object_file.emplace(obj_buffer->getBufferStart(), obj_buffer->getBufferEnd()); + } return llvm::Expected>(std::move(obj_buffer)); }); @@ -521,6 +530,13 @@ void llvm_state_add_obj_to_jit(Jit &j, const std::string &obj) "The function for adding a compiled module to the jit failed. The full error message:\n{}", err_report)); } // LCOV_EXCL_STOP + + // Add the object code also to the + // m_object_file member. + // NOTE: this function at the moment is used when m_object_file + // is supposed to be empty. + assert(!j.m_object_file); + j.m_object_file.emplace(obj); } // Helper to create an LLVM module from bitcode. diff --git a/test/llvm_state.cpp b/test/llvm_state.cpp index 17d3b7777..7a3dd8d67 100644 --- a/test/llvm_state.cpp +++ b/test/llvm_state.cpp @@ -82,14 +82,16 @@ TEST_CASE("copy semantics") llvm_state s{kw::mname = "sample state", kw::opt_level = 2u, kw::fast_math = true}; + taylor_add_jet(s, "jet", {x * y, y * x}, 1, 1, true, false); + REQUIRE(s.module_name() == "sample state"); REQUIRE(s.opt_level() == 2u); REQUIRE(s.fast_math()); REQUIRE(!s.is_compiled()); - - taylor_add_jet(s, "jet", {x * y, y * x}, 1, 1, true, false); + REQUIRE(!s.has_object_code()); const auto orig_ir = s.get_ir(); + const auto orig_bc = s.get_bc(); auto s2 = s; @@ -97,7 +99,10 @@ TEST_CASE("copy semantics") REQUIRE(s2.opt_level() == 2u); REQUIRE(s2.fast_math()); REQUIRE(!s2.is_compiled()); + REQUIRE(!s2.has_object_code()); + REQUIRE(s2.get_ir() == orig_ir); + REQUIRE(s2.get_bc() == orig_bc); s2.compile(); @@ -122,6 +127,7 @@ TEST_CASE("copy semantics") s.compile(); const auto orig_ir = s.get_ir(); + const auto orig_bc = s.get_bc(); auto s2 = s; @@ -129,7 +135,10 @@ TEST_CASE("copy semantics") REQUIRE(s2.opt_level() == 2u); REQUIRE(s2.fast_math()); REQUIRE(s2.is_compiled()); + REQUIRE(!s2.has_object_code()); + REQUIRE(s2.get_ir() == orig_ir); + REQUIRE(s2.get_bc() == orig_bc); auto jptr = reinterpret_cast(s2.jit_lookup("jet")); @@ -154,6 +163,7 @@ TEST_CASE("copy semantics") auto jptr = reinterpret_cast(s.jit_lookup("jet")); const auto orig_ir = s.get_ir(); + const auto orig_bc = s.get_bc(); auto s2 = s; @@ -161,7 +171,10 @@ TEST_CASE("copy semantics") REQUIRE(s2.opt_level() == 2u); REQUIRE(s2.fast_math()); REQUIRE(s2.is_compiled()); + REQUIRE(s2.has_object_code()); + REQUIRE(s2.get_ir() == orig_ir); + REQUIRE(s2.get_bc() == orig_bc); jptr = reinterpret_cast(s2.jit_lookup("jet")); @@ -212,6 +225,7 @@ TEST_CASE("s11n") llvm_state s{kw::mname = "foo"}; const auto orig_ir = s.get_ir(); + const auto orig_bc = s.get_bc(); { boost::archive::binary_oarchive oa(ss); @@ -228,7 +242,9 @@ TEST_CASE("s11n") } REQUIRE(!s.is_compiled()); + REQUIRE(!s.has_object_code()); REQUIRE(s.get_ir() == orig_ir); + REQUIRE(s.get_bc() == orig_bc); REQUIRE(s.module_name() == "foo"); REQUIRE(s.opt_level() == 3u); REQUIRE(s.fast_math() == false); @@ -246,6 +262,7 @@ TEST_CASE("s11n") s.compile(); const auto orig_ir = s.get_ir(); + const auto orig_bc = s.get_bc(); { boost::archive::binary_oarchive oa(ss); @@ -262,8 +279,10 @@ TEST_CASE("s11n") } REQUIRE(s.is_compiled()); + REQUIRE(!s.has_object_code()); REQUIRE(s.module_name() == "foo"); REQUIRE(s.get_ir() == orig_ir); + REQUIRE(s.get_bc() == orig_bc); REQUIRE(s.opt_level() == 3u); REQUIRE(s.fast_math() == false); REQUIRE(s.force_avx512() == true); @@ -280,6 +299,7 @@ TEST_CASE("s11n") s.compile(); const auto orig_ir = s.get_ir(); + const auto orig_bc = s.get_bc(); s.jit_lookup("jet"); @@ -298,7 +318,9 @@ TEST_CASE("s11n") } REQUIRE(s.is_compiled()); + REQUIRE(s.has_object_code()); REQUIRE(s.get_ir() == orig_ir); + REQUIRE(s.get_bc() == orig_bc); REQUIRE(s.module_name() == "foo"); REQUIRE(s.opt_level() == 3u); REQUIRE(s.fast_math() == false); From 0bb8ce2716167438def068e31de4b600d801dbac Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Wed, 16 Aug 2023 14:03:05 +0200 Subject: [PATCH 10/13] Remove unused includes, update changelog. --- doc/changelog.rst | 14 ++++++++++++++ src/llvm_state.cpp | 2 -- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/doc/changelog.rst b/doc/changelog.rst index 92079cbc9..ceb634266 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -4,9 +4,20 @@ Changelog 1.1.0 (unreleased) ------------------ +New +~~~ + +- It is now possible to get the LLVM bitcode of + an ``llvm_state`` + (`#339 `__). + Changes ~~~~~~~ +- The LLVM bitcode is now used internally (instead of the textual + representation of the IR) when copying and serialising + an ``llvm_state`` + (`#339 `__). - The optimisation pass in an ``llvm_state`` is now automatically called during compilation (`#339 `__). @@ -14,6 +25,9 @@ Changes Fix ~~~ +- Fix the object file of an ``llvm_state`` not being + preserved during copy and deserialisation + (`#339 `__). - Fix LLVM module name not being preserved during copy and deserialisation of ``llvm_state`` (`#339 `__). diff --git a/src/llvm_state.cpp b/src/llvm_state.cpp index 32bfc30ef..318586a5d 100644 --- a/src/llvm_state.cpp +++ b/src/llvm_state.cpp @@ -62,11 +62,9 @@ #include #include #include -#include #include #include #include -#include #include #include #include From 563bc0800b493c57bea97dd12c6c52e074ce2d05 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Wed, 16 Aug 2023 15:53:18 +0200 Subject: [PATCH 11/13] Fix for LLVM 10. --- src/detail/llvm_helpers.cpp | 2 +- src/llvm_state.cpp | 14 +++++++------- test/llvm_state.cpp | 3 +++ 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/detail/llvm_helpers.cpp b/src/detail/llvm_helpers.cpp index cdcf02a9d..27d08bdd0 100644 --- a/src/detail/llvm_helpers.cpp +++ b/src/detail/llvm_helpers.cpp @@ -939,7 +939,7 @@ std::string llvm_type_name(llvm::Type *t) t->print(ostr, false, true); - return retval; + return std::move(ostr.str()); } // This function will return true if: diff --git a/src/llvm_state.cpp b/src/llvm_state.cpp index 318586a5d..7c603cf7b 100644 --- a/src/llvm_state.cpp +++ b/src/llvm_state.cpp @@ -472,7 +472,7 @@ struct llvm_state::jit { ostr << err; throw std::invalid_argument(fmt::format( - "The function for adding a module to the jit failed. The full error message:\n{}", err_report)); + "The function for adding a module to the jit failed. The full error message:\n{}", ostr.str())); } // LCOV_EXCL_STOP } @@ -525,7 +525,7 @@ void llvm_state_add_obj_to_jit(Jit &j, const std::string &obj) ostr << err; throw std::invalid_argument(fmt::format( - "The function for adding a compiled module to the jit failed. The full error message:\n{}", err_report)); + "The function for adding a compiled module to the jit failed. The full error message:\n{}", ostr.str())); } // LCOV_EXCL_STOP @@ -560,7 +560,7 @@ auto llvm_state_bc_to_module(const std::string &module_name, const std::string & ostr << err; throw std::invalid_argument( - fmt::format("LLVM bitcode parsing failed. The full error message:\n{}", err_report)); + fmt::format("LLVM bitcode parsing failed. The full error message:\n{}", ostr.str())); } // LCOV_EXCL_STOP @@ -1003,7 +1003,7 @@ void llvm_state::verify_function(llvm::Function *f) f->eraseFromParent(); throw std::invalid_argument(fmt::format( - "The verification of the function '{}' failed. The full error message:\n{}", fname, err_report)); + "The verification of the function '{}' failed. The full error message:\n{}", fname, ostr.str())); } } @@ -1252,7 +1252,7 @@ void llvm_state::compile() if (llvm::verifyModule(*m_module, &ostr)) { throw std::runtime_error( - fmt::format("The verification of the module '{}' produced an error:\n{}", m_module_name, out)); + fmt::format("The verification of the module '{}' produced an error:\n{}", m_module_name, ostr.str())); } } @@ -1319,7 +1319,7 @@ std::string llvm_state::get_ir() const m_module->print(ostr, nullptr); - return out; + return std::move(ostr.str()); } else { // The module has been compiled. // Return the IR snapshot that @@ -1338,7 +1338,7 @@ std::string llvm_state::get_bc() const llvm::WriteBitcodeToFile(*m_module, ostr); - return out; + return std::move(ostr.str()); } else { // The module has been compiled. // Return the bitcode snapshot that diff --git a/test/llvm_state.cpp b/test/llvm_state.cpp index 7a3dd8d67..476b9b33d 100644 --- a/test/llvm_state.cpp +++ b/test/llvm_state.cpp @@ -67,6 +67,9 @@ TEST_CASE("empty state") std::cout << s << '\n'; std::cout << s.get_ir() << '\n'; + REQUIRE(!s.get_bc().empty()); + REQUIRE(!s.get_ir().empty()); + // Print also some info on the FP types. std::cout << "Double digits : " << std::numeric_limits::digits << '\n'; std::cout << "Long double digits: " << std::numeric_limits::digits << '\n'; From 378d03947570cae293abe1fca7686065c5538360 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Wed, 16 Aug 2023 15:58:26 +0200 Subject: [PATCH 12/13] Improve coverage. --- test/llvm_state.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/llvm_state.cpp b/test/llvm_state.cpp index 476b9b33d..2e369b675 100644 --- a/test/llvm_state.cpp +++ b/test/llvm_state.cpp @@ -127,8 +127,19 @@ TEST_CASE("copy semantics") taylor_add_jet(s, "jet", {x * y, y * x}, 1, 1, true, false); + // On-the-fly testing for string repr. + std::ostringstream oss; + oss << s; + const auto orig_repr = oss.str(); + s.compile(); + oss.str(""); + oss << s; + const auto compiled_repr = oss.str(); + + REQUIRE(orig_repr != compiled_repr); + const auto orig_ir = s.get_ir(); const auto orig_bc = s.get_bc(); From eb86d2405edf385290f60be1149c521d55f08f8a Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Thu, 17 Aug 2023 09:48:28 +0200 Subject: [PATCH 13/13] Small internal doc fix. --- src/llvm_state.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/llvm_state.cpp b/src/llvm_state.cpp index 7c603cf7b..0f68c1892 100644 --- a/src/llvm_state.cpp +++ b/src/llvm_state.cpp @@ -359,9 +359,9 @@ struct llvm_state::jit { assert(obj_buffer); // NOTE: this callback will be invoked the first time a jit lookup is performed, - // even if the object code was copied from another llvm_state or - // loaded from an archive. In such a case, m_object_file has already been set up properly and we just sanity - // check in debug mode that the content of m_object_file matches the content of obj_buffer. + // even if the object code was manually injected via llvm_state_add_obj_to_jit() + // (e.g., during copy, des11n, etc.). In such a case, m_object_file has already been set up properly and we + // just sanity check in debug mode that the content of m_object_file matches the content of obj_buffer. if (m_object_file) { assert(obj_buffer->getBufferSize() == m_object_file->size()); assert(std::equal(obj_buffer->getBufferStart(), obj_buffer->getBufferEnd(), m_object_file->begin()));