From ffb7b1080e58311b65b84036854bb37ade633505 Mon Sep 17 00:00:00 2001 From: Dwight Guth Date: Thu, 20 Jun 2024 14:01:40 -0500 Subject: [PATCH] llvm-kompile --profile-matching (#1095) We add a new flag to `llvm-kompile` which, when enabled, instruments the `k_step` function and the optimized `step_` functions with calls to a new translation unit, clock.cpp, which contains some very basic use of `clock_gettime` in order to determine how much time is being spent doing pattern matching in order to try to apply each rewrite step. The resulting file contains data about the ordinal of the rule that applied and the time it took to determine that that rule should apply. It emits this data on stderr in a tabular format that can be read easily by a computer. This information is useful in determining how to optimize pattern matching in cases where it is still not clear where time is being spent even after `kompile -O3 --iterated-threshold 1` is passed. When the flag is not passed to the code generator, no instrumentation is emitted, meaning this is zero-overhead unless it is enabled. --- bin/llvm-kompile | 9 ++++- include/kllvm/codegen/Decision.h | 10 +++--- lib/codegen/Decision.cpp | 36 ++++++++++++++----- runtime/util/CMakeLists.txt | 1 + runtime/util/clock.cpp | 22 ++++++++++++ scripts/matching-profile/join-with-locations | 6 ++++ .../matching-profile-formatter | 4 +++ scripts/matching-profile/sum-matching-profile | 3 ++ tools/llvm-kompile-codegen/main.cpp | 13 +++++-- 9 files changed, 87 insertions(+), 17 deletions(-) create mode 100644 runtime/util/clock.cpp create mode 100755 scripts/matching-profile/join-with-locations create mode 100755 scripts/matching-profile/matching-profile-formatter create mode 100755 scripts/matching-profile/sum-matching-profile diff --git a/bin/llvm-kompile b/bin/llvm-kompile index d42d7e63d..e6fb1ba2d 100755 --- a/bin/llvm-kompile +++ b/bin/llvm-kompile @@ -40,7 +40,10 @@ Options: --mutable-bytes Use the faster, unsound (mutable) semantics for objects of sort Bytes at run time, rather than the slower, sound (immutable) that are enabled by default. - --hidden-visibility Set the visibility of all global symbols in generated code to "hidden" + --hidden-visibility Set the visibility of all global symbols in generated code to + "hidden" + --profile-matching Instrument interpeter to emit a profile of time spent in + top-level rule matching on stderr. -O[0123] Set the optimization level for code generation. Any option not listed above will be passed through to clang; use '--' to @@ -187,6 +190,10 @@ while [[ $# -gt 0 ]]; do kompile_clang_flags+=("--hidden-visibility") shift ;; + --profile-matching) + codegen_flags+=("--profile-matching") + shift + ;; -O*) codegen_flags+=("$1") kompile_clang_flags+=("$1") diff --git a/include/kllvm/codegen/Decision.h b/include/kllvm/codegen/Decision.h index 57adb1193..03c15219b 100644 --- a/include/kllvm/codegen/Decision.h +++ b/include/kllvm/codegen/Decision.h @@ -405,6 +405,7 @@ class decision { value_type cat_; llvm::PHINode *fail_subject_, *fail_pattern_, *fail_sort_; llvm::AllocaInst *has_search_results_; + bool profile_matching_; std::map symbols_{}; @@ -422,7 +423,7 @@ class decision { llvm::AllocaInst *choice_buffer, llvm::AllocaInst *choice_depth, llvm::Module *module, value_type cat, llvm::PHINode *fail_subject, llvm::PHINode *fail_pattern, llvm::PHINode *fail_sort, - llvm::AllocaInst *has_search_results) + llvm::AllocaInst *has_search_results, bool profile_matching = false) : definition_(definition) , current_block_(entry_block) , failure_block_(failure_block) @@ -435,7 +436,8 @@ class decision { , fail_subject_(fail_subject) , fail_pattern_(fail_pattern) , fail_sort_(fail_sort) - , has_search_results_(has_search_results) { } + , has_search_results_(has_search_results) + , profile_matching_(profile_matching) { } /* adds code to the specified basic block to take a single step based on the specified decision tree and return the result of taking that step. */ @@ -464,10 +466,10 @@ void make_anywhere_function( void make_step_function( kore_definition *definition, llvm::Module *module, decision_node *dt, - bool search); + bool search, bool profile_matching); void make_step_function( kore_axiom_declaration *axiom, kore_definition *definition, - llvm::Module *module, partial_step res); + llvm::Module *module, partial_step res, bool profile_matching); void make_match_reason_function( kore_definition *definition, llvm::Module *module, kore_axiom_declaration *axiom, decision_node *dt); diff --git a/lib/codegen/Decision.cpp b/lib/codegen/Decision.cpp index 09e7d9f95..ba0852ee8 100644 --- a/lib/codegen/Decision.cpp +++ b/lib/codegen/Decision.cpp @@ -612,6 +612,14 @@ void leaf_node::codegen(decision *d) { = proof_event(d->definition_, d->module_) .rewrite_event_pre(axiom, arity, vars, subst, d->current_block_); + if (d->profile_matching_) { + llvm::CallInst::Create( + get_or_insert_function( + d->module_, "stop_clock", llvm::Type::getVoidTy(d->ctx_), + llvm::Type::getInt64Ty(d->ctx_)), + {llvm::ConstantInt::get(llvm::Type::getInt64Ty(d->ctx_), ordinal)}, "", + d->current_block_); + } auto *call = llvm::CallInst::Create(apply_rule, args, "", d->current_block_); set_debug_loc(call); call->setCallingConv(llvm::CallingConv::Tail); @@ -707,7 +715,7 @@ static void init_choice_buffer( decision_node *dt, llvm::Module *module, llvm::BasicBlock *block, llvm::BasicBlock *stuck, llvm::BasicBlock *fail, llvm::AllocaInst **choice_buffer_out, llvm::AllocaInst **choice_depth_out, - llvm::IndirectBrInst **jump_out) { + llvm::IndirectBrInst **jump_out, bool profile_matching) { std::unordered_set leaves; dt->preprocess(leaves); auto *ty = llvm::ArrayType::get( @@ -716,6 +724,12 @@ static void init_choice_buffer( auto *choice_buffer = new llvm::AllocaInst(ty, 0, "choiceBuffer", block); auto *choice_depth = new llvm::AllocaInst( llvm::Type::getInt64Ty(module->getContext()), 0, "choiceDepth", block); + if (profile_matching) { + llvm::CallInst::Create( + get_or_insert_function( + module, "start_clock", llvm::Type::getVoidTy(module->getContext())), + {}, "", block); + } auto *zero = llvm::ConstantInt::get(llvm::Type::getInt64Ty(module->getContext()), 0); new llvm::StoreInst(zero, choice_depth, block); @@ -799,7 +813,8 @@ void make_eval_or_anywhere_function( llvm::AllocaInst *choice_depth = nullptr; llvm::IndirectBrInst *jump = nullptr; init_choice_buffer( - dt, module, block, stuck, fail, &choice_buffer, &choice_depth, &jump); + dt, module, block, stuck, fail, &choice_buffer, &choice_depth, &jump, + false); int i = 0; decision codegen( @@ -1113,7 +1128,7 @@ std::pair, llvm::BasicBlock *> step_function_header( void make_step_function( kore_definition *definition, llvm::Module *module, decision_node *dt, - bool search) { + bool search, bool profile_matching) { auto *block_type = getvalue_type({sort_category::Symbol, 0}, module); auto *debug_type = get_debug_type({sort_category::Symbol, 0}, "SortGeneratedTopCell{}"); @@ -1155,7 +1170,8 @@ void make_step_function( llvm::IndirectBrInst *jump = nullptr; init_choice_buffer( - dt, module, block, pre_stuck, fail, &choice_buffer, &choice_depth, &jump); + dt, module, block, pre_stuck, fail, &choice_buffer, &choice_depth, &jump, + profile_matching); insert_call_to_clear(block); llvm::AllocaInst *has_search_results = nullptr; @@ -1178,7 +1194,7 @@ void make_step_function( decision codegen( definition, result.second, fail, jump, choice_buffer, choice_depth, module, {sort_category::Symbol, 0}, nullptr, nullptr, nullptr, - has_search_results); + has_search_results, profile_matching); codegen.store( std::make_pair(collected_val->getName().str(), collected_val->getType()), collected_val); @@ -1288,7 +1304,8 @@ void make_match_reason_function( llvm::AllocaInst *choice_depth = nullptr; llvm::IndirectBrInst *jump = nullptr; init_choice_buffer( - dt, module, block, pre_stuck, fail, &choice_buffer, &choice_depth, &jump); + dt, module, block, pre_stuck, fail, &choice_buffer, &choice_depth, &jump, + false); init_debug_param( match_func, 0, "subject", {sort_category::Symbol, 0}, @@ -1340,7 +1357,7 @@ kore_pattern *make_partial_term( void make_step_function( kore_axiom_declaration *axiom, kore_definition *definition, - llvm::Module *module, partial_step res) { + llvm::Module *module, partial_step res, bool profile_matching) { auto *block_type = getvalue_type({sort_category::Symbol, 0}, module); std::vector arg_types; std::vector debug_types; @@ -1387,7 +1404,7 @@ void make_step_function( llvm::IndirectBrInst *jump = nullptr; init_choice_buffer( res.dt, module, block, pre_stuck, fail, &choice_buffer, &choice_depth, - &jump); + &jump, profile_matching); insert_call_to_clear(block); llvm::BranchInst::Create(stuck, pre_stuck); @@ -1414,7 +1431,8 @@ void make_step_function( i = 0; decision codegen( definition, header.second, fail, jump, choice_buffer, choice_depth, - module, {sort_category::Symbol, 0}, nullptr, nullptr, nullptr, nullptr); + module, {sort_category::Symbol, 0}, nullptr, nullptr, nullptr, nullptr, + profile_matching); for (auto *val : header.first) { val->setName(res.residuals[i].occurrence.substr(0, max_name_length)); codegen.store(std::make_pair(val->getName().str(), val->getType()), val); diff --git a/runtime/util/CMakeLists.txt b/runtime/util/CMakeLists.txt index 012b9c7d9..4f5167922 100644 --- a/runtime/util/CMakeLists.txt +++ b/runtime/util/CMakeLists.txt @@ -6,6 +6,7 @@ add_library(util STATIC match_log.cpp search.cpp util.cpp + clock.cpp ) install( diff --git a/runtime/util/clock.cpp b/runtime/util/clock.cpp new file mode 100644 index 000000000..652e79ba3 --- /dev/null +++ b/runtime/util/clock.cpp @@ -0,0 +1,22 @@ +#include +#include +#include + +static struct timespec start { }; + +extern "C" { +void start_clock() { + clock_gettime(CLOCK_MONOTONIC, &start); +} + +void stop_clock(uint64_t ordinal) { + struct timespec stop { }; + clock_gettime(CLOCK_MONOTONIC, &stop); + int64_t diff_s = stop.tv_sec - start.tv_sec; + int64_t diff_ns = stop.tv_nsec - start.tv_nsec; + + uint64_t diff_ns_total = diff_s * 1000000000 + diff_ns; + + std::cerr << ordinal << " " << diff_ns_total << std::endl; +} +} diff --git a/scripts/matching-profile/join-with-locations b/scripts/matching-profile/join-with-locations new file mode 100755 index 000000000..029d64ae9 --- /dev/null +++ b/scripts/matching-profile/join-with-locations @@ -0,0 +1,6 @@ +#!/bin/bash +# Converts a table of ordinals and seconds to a table of source/location and seconds. Strips directory from file paths. +while read -r ordinal time; do + loc=$(llvm-kompile-compute-loc "$2" $ordinal) + echo "${loc##*/} $time" +done < "$1" diff --git a/scripts/matching-profile/matching-profile-formatter b/scripts/matching-profile/matching-profile-formatter new file mode 100755 index 000000000..a6009cc75 --- /dev/null +++ b/scripts/matching-profile/matching-profile-formatter @@ -0,0 +1,4 @@ +#!/bin/bash +# Sorts table of locations and seconds in reverse order and displays it with individual and cumulative percentages. +sum=$(cat "$1" | awk '{sum += $2} END { print sum }') +tac "$1" | awk '{perc_local=$2 / '"$sum"' * 100; perc += perc_local; print $1 " " $2 " " perc_local " " perc}' diff --git a/scripts/matching-profile/sum-matching-profile b/scripts/matching-profile/sum-matching-profile new file mode 100755 index 000000000..22fc5dc1e --- /dev/null +++ b/scripts/matching-profile/sum-matching-profile @@ -0,0 +1,3 @@ +#!/bin/bash +# Aggregates data from --profile-matching and sums together identical ordinals, yielding table with ordinals and seconds. +cat "$1" | awk '{sum[$1] += $2} END { for (i in sum) print i, sum[i]}' | sort -n -k2 | awk '{ print $1, $2 / 1000000000}' diff --git a/tools/llvm-kompile-codegen/main.cpp b/tools/llvm-kompile-codegen/main.cpp index bfe01ad22..99fabf3d4 100644 --- a/tools/llvm-kompile-codegen/main.cpp +++ b/tools/llvm-kompile-codegen/main.cpp @@ -78,6 +78,12 @@ cl::opt safe_partial( "exception."), cl::init(false), cl::cat(codegen_tool_cat)); +cl::opt profile_matching( + "profile-matching", + cl::desc("Instrument k_step functions with code to profile time spent " + "matching when applying each rule."), + cl::init(false), cl::cat(codegen_tool_cat)); + namespace { fs::path dt_dir() { @@ -174,7 +180,8 @@ int main(int argc, char **argv) { definition->get_hooked_sorts()); make_apply_rule_function( axiom, definition.get(), mod.get(), residuals.residuals); - make_step_function(axiom, definition.get(), mod.get(), residuals); + make_step_function( + axiom, definition.get(), mod.get(), residuals, profile_matching); } else { make_apply_rule_function(axiom, definition.get(), mod.get(), true); } @@ -195,11 +202,11 @@ int main(int argc, char **argv) { auto *dt = parse_yamldecision_tree( mod.get(), decision_tree, definition->get_all_symbols(), definition->get_hooked_sorts()); - make_step_function(definition.get(), mod.get(), dt, false); + make_step_function(definition.get(), mod.get(), dt, false, profile_matching); auto *dt_search = parse_yamldecision_tree( mod.get(), dt_dir() / "dt-search.yaml", definition->get_all_symbols(), definition->get_hooked_sorts()); - make_step_function(definition.get(), mod.get(), dt_search, true); + make_step_function(definition.get(), mod.get(), dt_search, true, false); auto index = read_index_file(); for (auto const &entry : definition->get_symbols()) {