diff --git a/doc/changelog.rst b/doc/changelog.rst index 695db4cb3..c05eca253 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -29,6 +29,10 @@ Changes Fix ~~~ +- Fix global constants in an LLVM module being generated in unordered fashion + when compact mode is active. This would result in two logically-identical + modules being considered different by the in-memory cache + (`#359 `__). - Fix compiler warning when building without SLEEF support (`#356 `__). - Improve the numerical stability of the VSOP2013 model diff --git a/include/heyoka/expression.hpp b/include/heyoka/expression.hpp index 8a781cfd8..9c833a1aa 100644 --- a/include/heyoka/expression.hpp +++ b/include/heyoka/expression.hpp @@ -632,11 +632,13 @@ HEYOKA_DLL_PUBLIC llvm::Function *taylor_c_diff_func(llvm_state &, llvm::Type *, std::uint32_t, bool); HEYOKA_DLL_PUBLIC std::uint32_t get_param_size(const expression &); +HEYOKA_DLL_PUBLIC std::uint32_t get_param_size(const std::vector &); HEYOKA_DLL_PUBLIC std::vector get_params(const expression &); HEYOKA_DLL_PUBLIC std::vector get_params(const std::vector &); HEYOKA_DLL_PUBLIC bool is_time_dependent(const expression &); +HEYOKA_DLL_PUBLIC bool is_time_dependent(const std::vector &); namespace detail { diff --git a/src/expression_basic.cpp b/src/expression_basic.cpp index 9a81a5a3f..c81c61203 100644 --- a/src/expression_basic.cpp +++ b/src/expression_basic.cpp @@ -1081,11 +1081,9 @@ std::uint32_t get_param_size(detail::funcptr_set &func_set, const expression &ex using type = uncvref_t; if constexpr (std::is_same_v) { - if (v.idx() == std::numeric_limits::max()) { - throw std::overflow_error("Overflow dected in get_n_param()"); - } + using safe_uint32_t = boost::safe_numerics::safe; - retval = std::max(static_cast(v.idx() + 1u), retval); + retval = std::max(static_cast(v.idx() + safe_uint32_t(1)), retval); } else if constexpr (std::is_same_v) { const auto f_id = v.get_ptr(); @@ -1124,6 +1122,19 @@ std::uint32_t get_param_size(const expression &ex) return detail::get_param_size(func_set, ex); } +std::uint32_t get_param_size(const std::vector &v_ex) +{ + std::uint32_t retval = 0; + + detail::funcptr_set func_set; + + for (const auto &ex : v_ex) { + retval = std::max(retval, detail::get_param_size(func_set, ex)); + } + + return retval; +} + namespace detail { @@ -1221,47 +1232,45 @@ namespace { // NOLINTNEXTLINE(misc-no-recursion) -bool is_time_dependent(funcptr_map &func_map, const expression &ex) +bool is_time_dependent(funcptr_set &func_set, const expression &ex) { // - If ex is a function, check if it is time-dependent, or // if any of its arguments is time-dependent, // - otherwise, return false. return std::visit( // NOLINTNEXTLINE(misc-no-recursion) - [&func_map](const auto &v) { + [&func_set](const auto &v) { using type = uncvref_t; if constexpr (std::is_same_v) { const auto f_id = v.get_ptr(); - // Did we already determine if v is time-dependent? - if (const auto it = func_map.find(f_id); it != func_map.end()) { - return it->second; + // Did we already determine that v is *not* time-dependent? + if (const auto it = func_set.find(f_id); it != func_set.end()) { + return false; } // Check if the function is intrinsically time-dependent. - bool is_tm_dep = v.is_time_dependent(); - if (!is_tm_dep) { - // The function does **not** intrinsically depend on time. - // Check its arguments. - for (const auto &a : v.args()) { - if (is_time_dependent(func_map, a)) { - // A time-dependent argument was found. Update - // is_tm_dep and break out, no point in checking - // the other arguments. - is_tm_dep = true; - break; - } + if (v.is_time_dependent()) { + return true; + } + + // The function does *not* intrinsically depend on time. + // Check its arguments. + for (const auto &a : v.args()) { + if (is_time_dependent(func_set, a)) { + // A time-dependent argument was found, return true. + return true; } } // Update the cache. - [[maybe_unused]] const auto [_, flag] = func_map.emplace(f_id, is_tm_dep); + [[maybe_unused]] const auto [_, flag] = func_set.emplace(f_id); // An expression cannot contain itself. assert(flag); - return is_tm_dep; + return false; } else { return false; } @@ -1276,9 +1285,24 @@ bool is_time_dependent(funcptr_map &func_map, const expression &ex) // Determine if an expression is time-dependent. bool is_time_dependent(const expression &ex) { - detail::funcptr_map func_map; + // NOTE: this will contain pointers to (sub)expressions + // which are *not* time-dependent. + detail::funcptr_set func_set; + + return detail::is_time_dependent(func_set, ex); +} + +bool is_time_dependent(const std::vector &v_ex) +{ + detail::funcptr_set func_set; + + for (const auto &ex : v_ex) { + if (detail::is_time_dependent(func_set, ex)) { + return true; + } + } - return detail::is_time_dependent(func_map, ex); + return false; } namespace detail diff --git a/src/expression_cfunc.cpp b/src/expression_cfunc.cpp index 768b44749..1214a48c5 100644 --- a/src/expression_cfunc.cpp +++ b/src/expression_cfunc.cpp @@ -1006,7 +1006,16 @@ auto cfunc_build_function_maps(llvm_state &s, llvm::Type *fp_t, const std::vecto // will contain {f : [[a, b, c], [d, e, f]]}. // After construction, we have verified that for each function // in the map the sets of arguments have all the same size. - fast_umap>>> tmp_map; + // NOTE: again, here and below we use name-based ordered maps for the functions. + // This ensures that the invocations of cm_make_arg_gen_*(), which create several + // global variables, always happen in a well-defined order. If we used an unordered map instead, + // the variables would be created in a "random" order, which would result in a + // unnecessary miss for the in-memory cache machinery when two logically-identical + // LLVM modules are considered different because of the difference in the order + // of declaration of global variables. + std::map>>, + llvm_func_name_compare> + tmp_map; for (const auto &ex : seg) { // Get the evaluation function. @@ -1021,14 +1030,14 @@ auto cfunc_build_function_maps(llvm_state &s, llvm::Type *fp_t, const std::vecto // element into a set of indices/constants. const auto c_args = udef_to_variants(ex, {}); + // LCOV_EXCL_START if (!is_new_func && it->second.back().size() - 1u != c_args.size()) { - // LCOV_EXCL_START throw std::invalid_argument( fmt::format("Inconsistent arity detected in a compiled function in compact " "mode: the same function is being called with both {} and {} arguments", it->second.back().size() - 1u, c_args.size())); - // LCOV_EXCL_STOP } + // LCOV_EXCL_STOP // Add the new set of arguments. it->second.emplace_back(); @@ -1042,7 +1051,8 @@ auto cfunc_build_function_maps(llvm_state &s, llvm::Type *fp_t, const std::vecto // Now we build the transposition of tmp_map: from {f : [[a, b, c], [d, e, f]]} // to {f : [[a, d], [b, e], [c, f]]}. - fast_umap, std::vector>>> + std::map, std::vector>>, + llvm_func_name_compare> tmp_map_transpose; for (const auto &[func, vv] : tmp_map) { assert(!vv.empty()); // LCOV_EXCL_LINE diff --git a/src/taylor_02.cpp b/src/taylor_02.cpp index 367dc9aad..b2b7df9e4 100644 --- a/src/taylor_02.cpp +++ b/src/taylor_02.cpp @@ -57,7 +57,6 @@ #endif #include -#include #include #include #include @@ -749,7 +748,16 @@ auto taylor_build_function_maps(llvm_state &s, llvm::Type *fp_t, const std::vect // will contain {f : [[a, b, c], [d, e, f]]}. // After construction, we have verified that for each function // in the map the sets of arguments have all the same size. - fast_umap>>> tmp_map; + // NOTE: again, here and below we use name-based ordered maps for the functions. + // This ensures that the invocations of cm_make_arg_gen_*(), which create several + // global variables, always happen in a well-defined order. If we used an unordered map instead, + // the variables would be created in a "random" order, which would result in a + // unnecessary miss for the in-memory cache machinery when two logically-identical + // LLVM modules are considered different because of the difference in the order + // of declaration of global variables. + std::map>>, + llvm_func_name_compare> + tmp_map; for (const auto &ex : seg) { // Get the function for the computation of the derivative. @@ -764,12 +772,14 @@ auto taylor_build_function_maps(llvm_state &s, llvm::Type *fp_t, const std::vect // element into a set of indices/constants. const auto cdiff_args = udef_to_variants(ex.first, ex.second); + // LCOV_EXCL_START if (!is_new_func && it->second.back().size() - 1u != cdiff_args.size()) { throw std::invalid_argument( fmt::format("Inconsistent arity detected in a Taylor derivative function in compact " "mode: the same function is being called with both {} and {} arguments", it->second.back().size() - 1u, cdiff_args.size())); } + // LCOV_EXCL_STOP // Add the new set of arguments. it->second.emplace_back(); @@ -783,7 +793,8 @@ auto taylor_build_function_maps(llvm_state &s, llvm::Type *fp_t, const std::vect // Now we build the transposition of tmp_map: from {f : [[a, b, c], [d, e, f]]} // to {f : [[a, d], [b, e], [c, f]]}. - fast_umap, std::vector>>> + std::map, std::vector>>, + llvm_func_name_compare> tmp_map_transpose; for (const auto &[func, vv] : tmp_map) { assert(!vv.empty()); // LCOV_EXCL_LINE diff --git a/test/expression.cpp b/test/expression.cpp index bd39f7722..830de1c30 100644 --- a/test/expression.cpp +++ b/test/expression.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -331,8 +332,7 @@ TEST_CASE("get_param_size") REQUIRE(get_param_size(par[0]) == 1u); REQUIRE(get_param_size(par[123]) == 124u); - REQUIRE_THROWS_MATCHES(get_param_size(par[std::numeric_limits::max()]), std::overflow_error, - Message("Overflow dected in get_n_param()")); + REQUIRE_THROWS_AS(get_param_size(par[std::numeric_limits::max()]), std::system_error); REQUIRE(get_param_size(par[123] + "x"_var) == 124u); REQUIRE(get_param_size("x"_var + par[123]) == 124u); REQUIRE(get_param_size(par[123] + 1_dbl) == 124u); @@ -362,6 +362,15 @@ TEST_CASE("get_param_size") bar = par[23] + (foo - x) / (2. * foo) + par[92]; REQUIRE(get_param_size(bar) == 93u); + + // Tests with vectorisation. + REQUIRE(get_param_size({par[0], par[123]}) == 124u); + REQUIRE(get_param_size({par[123], par[0]}) == 124u); + REQUIRE(get_param_size({bar, bar + par[86]}) == 93u); + REQUIRE(get_param_size({bar, bar + par[186]}) == 187u); + REQUIRE(get_param_size({bar + par[86], bar}) == 93u); + REQUIRE(get_param_size({bar + par[186], bar}) == 187u); + REQUIRE(get_param_size({"x"_var, 2_dbl}) == 0u); } TEST_CASE("binary simpls") @@ -557,6 +566,19 @@ TEST_CASE("is_time_dependent") bar = hy::time + (foo - x) / (2. * foo) + hy::time; REQUIRE(is_time_dependent(bar)); + + // Tests with vectorization. + REQUIRE(is_time_dependent({bar})); + REQUIRE(is_time_dependent({x, bar})); + REQUIRE(is_time_dependent({bar, x})); + REQUIRE(!is_time_dependent({2_dbl - par[0], x})); + + foo = ((x + y) * (z + x)) * ((z - x) * (y + x)); + bar = (foo - x) / (2. * foo); + REQUIRE(!is_time_dependent({x, bar})); + REQUIRE(is_time_dependent({x, bar, hy::time})); + REQUIRE(is_time_dependent({x, hy::time, bar})); + REQUIRE(is_time_dependent({hy::time, x, bar})); } TEST_CASE("s11n") diff --git a/test/llvm_state_mem_cache.cpp b/test/llvm_state_mem_cache.cpp index 97960422b..839a3f2fd 100644 --- a/test/llvm_state_mem_cache.cpp +++ b/test/llvm_state_mem_cache.cpp @@ -6,6 +6,7 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#include #include #include #include @@ -140,3 +141,48 @@ TEST_CASE("slp_vectorize test") model::pendulum(), {1., 0.}, kw::tol = 1e-11, kw::slp_vectorize = true, kw::force_avx512 = true}; REQUIRE(llvm_state::get_memcache_size() > new_size); } + +// Bug: in compact mode, global variables used to be created in random +// order, which would lead to logically-identical modules considered +// different by the cache machinery due to the different declaration order. +TEST_CASE("bug cache miss compact mode") +{ + { + llvm_state::clear_memcache(); + llvm_state::set_memcache_limit(2048ull * 1024u * 1024u); + + auto ta = taylor_adaptive{model::pendulum(), {1., 0.}, kw::tol = 1e-11, kw::compact_mode = true}; + const auto orig_size = llvm_state::get_memcache_size(); + + // Re-create the same ta several times and then check the cache size has not changed. + for (auto i = 0; i < 100; ++i) { + ta = taylor_adaptive{model::pendulum(), {1., 0.}, kw::tol = 1e-11, kw::compact_mode = true}; + } + + REQUIRE(llvm_state::get_memcache_size() == orig_size); + } + + { + llvm_state::clear_memcache(); + llvm_state::set_memcache_limit(2048ull * 1024u * 1024u); + + { + llvm_state s; + add_cfunc(s, "func", {model::pendulum_energy()}, kw::compact_mode = true); + s.compile(); + (void)s.jit_lookup("func"); + } + + const auto orig_size = llvm_state::get_memcache_size(); + + // Re-create the same cfunc several times and then check the cache size has not changed. + for (auto i = 0; i < 100; ++i) { + llvm_state s; + add_cfunc(s, "func", {model::pendulum_energy()}, kw::compact_mode = true); + s.compile(); + (void)s.jit_lookup("func"); + } + + REQUIRE(llvm_state::get_memcache_size() == orig_size); + } +} diff --git a/test/model_ffnn.cpp b/test/model_ffnn.cpp index 4a5bd8c25..b9c0852a7 100644 --- a/test/model_ffnn.cpp +++ b/test/model_ffnn.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -19,7 +20,6 @@ #include #include #include -#include #include "catch.hpp"