diff --git a/bin/llvm-kompile b/bin/llvm-kompile index d42d7e63d..e6fb1ba2d 100755 --- a/bin/llvm-kompile +++ b/bin/llvm-kompile @@ -40,7 +40,10 @@ Options: --mutable-bytes Use the faster, unsound (mutable) semantics for objects of sort Bytes at run time, rather than the slower, sound (immutable) that are enabled by default. - --hidden-visibility Set the visibility of all global symbols in generated code to "hidden" + --hidden-visibility Set the visibility of all global symbols in generated code to + "hidden" + --profile-matching Instrument interpeter to emit a profile of time spent in + top-level rule matching on stderr. -O[0123] Set the optimization level for code generation. Any option not listed above will be passed through to clang; use '--' to @@ -187,6 +190,10 @@ while [[ $# -gt 0 ]]; do kompile_clang_flags+=("--hidden-visibility") shift ;; + --profile-matching) + codegen_flags+=("--profile-matching") + shift + ;; -O*) codegen_flags+=("$1") kompile_clang_flags+=("$1") diff --git a/include/kllvm/codegen/Decision.h b/include/kllvm/codegen/Decision.h index 57adb1193..03c15219b 100644 --- a/include/kllvm/codegen/Decision.h +++ b/include/kllvm/codegen/Decision.h @@ -405,6 +405,7 @@ class decision { value_type cat_; llvm::PHINode *fail_subject_, *fail_pattern_, *fail_sort_; llvm::AllocaInst *has_search_results_; + bool profile_matching_; std::map symbols_{}; @@ -422,7 +423,7 @@ class decision { llvm::AllocaInst *choice_buffer, llvm::AllocaInst *choice_depth, llvm::Module *module, value_type cat, llvm::PHINode *fail_subject, llvm::PHINode *fail_pattern, llvm::PHINode *fail_sort, - llvm::AllocaInst *has_search_results) + llvm::AllocaInst *has_search_results, bool profile_matching = false) : definition_(definition) , current_block_(entry_block) , failure_block_(failure_block) @@ -435,7 +436,8 @@ class decision { , fail_subject_(fail_subject) , fail_pattern_(fail_pattern) , fail_sort_(fail_sort) - , has_search_results_(has_search_results) { } + , has_search_results_(has_search_results) + , profile_matching_(profile_matching) { } /* adds code to the specified basic block to take a single step based on the specified decision tree and return the result of taking that step. */ @@ -464,10 +466,10 @@ void make_anywhere_function( void make_step_function( kore_definition *definition, llvm::Module *module, decision_node *dt, - bool search); + bool search, bool profile_matching); void make_step_function( kore_axiom_declaration *axiom, kore_definition *definition, - llvm::Module *module, partial_step res); + llvm::Module *module, partial_step res, bool profile_matching); void make_match_reason_function( kore_definition *definition, llvm::Module *module, kore_axiom_declaration *axiom, decision_node *dt); diff --git a/lib/codegen/Decision.cpp b/lib/codegen/Decision.cpp index 09e7d9f95..ba0852ee8 100644 --- a/lib/codegen/Decision.cpp +++ b/lib/codegen/Decision.cpp @@ -612,6 +612,14 @@ void leaf_node::codegen(decision *d) { = proof_event(d->definition_, d->module_) .rewrite_event_pre(axiom, arity, vars, subst, d->current_block_); + if (d->profile_matching_) { + llvm::CallInst::Create( + get_or_insert_function( + d->module_, "stop_clock", llvm::Type::getVoidTy(d->ctx_), + llvm::Type::getInt64Ty(d->ctx_)), + {llvm::ConstantInt::get(llvm::Type::getInt64Ty(d->ctx_), ordinal)}, "", + d->current_block_); + } auto *call = llvm::CallInst::Create(apply_rule, args, "", d->current_block_); set_debug_loc(call); call->setCallingConv(llvm::CallingConv::Tail); @@ -707,7 +715,7 @@ static void init_choice_buffer( decision_node *dt, llvm::Module *module, llvm::BasicBlock *block, llvm::BasicBlock *stuck, llvm::BasicBlock *fail, llvm::AllocaInst **choice_buffer_out, llvm::AllocaInst **choice_depth_out, - llvm::IndirectBrInst **jump_out) { + llvm::IndirectBrInst **jump_out, bool profile_matching) { std::unordered_set leaves; dt->preprocess(leaves); auto *ty = llvm::ArrayType::get( @@ -716,6 +724,12 @@ static void init_choice_buffer( auto *choice_buffer = new llvm::AllocaInst(ty, 0, "choiceBuffer", block); auto *choice_depth = new llvm::AllocaInst( llvm::Type::getInt64Ty(module->getContext()), 0, "choiceDepth", block); + if (profile_matching) { + llvm::CallInst::Create( + get_or_insert_function( + module, "start_clock", llvm::Type::getVoidTy(module->getContext())), + {}, "", block); + } auto *zero = llvm::ConstantInt::get(llvm::Type::getInt64Ty(module->getContext()), 0); new llvm::StoreInst(zero, choice_depth, block); @@ -799,7 +813,8 @@ void make_eval_or_anywhere_function( llvm::AllocaInst *choice_depth = nullptr; llvm::IndirectBrInst *jump = nullptr; init_choice_buffer( - dt, module, block, stuck, fail, &choice_buffer, &choice_depth, &jump); + dt, module, block, stuck, fail, &choice_buffer, &choice_depth, &jump, + false); int i = 0; decision codegen( @@ -1113,7 +1128,7 @@ std::pair, llvm::BasicBlock *> step_function_header( void make_step_function( kore_definition *definition, llvm::Module *module, decision_node *dt, - bool search) { + bool search, bool profile_matching) { auto *block_type = getvalue_type({sort_category::Symbol, 0}, module); auto *debug_type = get_debug_type({sort_category::Symbol, 0}, "SortGeneratedTopCell{}"); @@ -1155,7 +1170,8 @@ void make_step_function( llvm::IndirectBrInst *jump = nullptr; init_choice_buffer( - dt, module, block, pre_stuck, fail, &choice_buffer, &choice_depth, &jump); + dt, module, block, pre_stuck, fail, &choice_buffer, &choice_depth, &jump, + profile_matching); insert_call_to_clear(block); llvm::AllocaInst *has_search_results = nullptr; @@ -1178,7 +1194,7 @@ void make_step_function( decision codegen( definition, result.second, fail, jump, choice_buffer, choice_depth, module, {sort_category::Symbol, 0}, nullptr, nullptr, nullptr, - has_search_results); + has_search_results, profile_matching); codegen.store( std::make_pair(collected_val->getName().str(), collected_val->getType()), collected_val); @@ -1288,7 +1304,8 @@ void make_match_reason_function( llvm::AllocaInst *choice_depth = nullptr; llvm::IndirectBrInst *jump = nullptr; init_choice_buffer( - dt, module, block, pre_stuck, fail, &choice_buffer, &choice_depth, &jump); + dt, module, block, pre_stuck, fail, &choice_buffer, &choice_depth, &jump, + false); init_debug_param( match_func, 0, "subject", {sort_category::Symbol, 0}, @@ -1340,7 +1357,7 @@ kore_pattern *make_partial_term( void make_step_function( kore_axiom_declaration *axiom, kore_definition *definition, - llvm::Module *module, partial_step res) { + llvm::Module *module, partial_step res, bool profile_matching) { auto *block_type = getvalue_type({sort_category::Symbol, 0}, module); std::vector arg_types; std::vector debug_types; @@ -1387,7 +1404,7 @@ void make_step_function( llvm::IndirectBrInst *jump = nullptr; init_choice_buffer( res.dt, module, block, pre_stuck, fail, &choice_buffer, &choice_depth, - &jump); + &jump, profile_matching); insert_call_to_clear(block); llvm::BranchInst::Create(stuck, pre_stuck); @@ -1414,7 +1431,8 @@ void make_step_function( i = 0; decision codegen( definition, header.second, fail, jump, choice_buffer, choice_depth, - module, {sort_category::Symbol, 0}, nullptr, nullptr, nullptr, nullptr); + module, {sort_category::Symbol, 0}, nullptr, nullptr, nullptr, nullptr, + profile_matching); for (auto *val : header.first) { val->setName(res.residuals[i].occurrence.substr(0, max_name_length)); codegen.store(std::make_pair(val->getName().str(), val->getType()), val); diff --git a/runtime/util/CMakeLists.txt b/runtime/util/CMakeLists.txt index 012b9c7d9..4f5167922 100644 --- a/runtime/util/CMakeLists.txt +++ b/runtime/util/CMakeLists.txt @@ -6,6 +6,7 @@ add_library(util STATIC match_log.cpp search.cpp util.cpp + clock.cpp ) install( diff --git a/runtime/util/clock.cpp b/runtime/util/clock.cpp new file mode 100644 index 000000000..652e79ba3 --- /dev/null +++ b/runtime/util/clock.cpp @@ -0,0 +1,22 @@ +#include +#include +#include + +static struct timespec start { }; + +extern "C" { +void start_clock() { + clock_gettime(CLOCK_MONOTONIC, &start); +} + +void stop_clock(uint64_t ordinal) { + struct timespec stop { }; + clock_gettime(CLOCK_MONOTONIC, &stop); + int64_t diff_s = stop.tv_sec - start.tv_sec; + int64_t diff_ns = stop.tv_nsec - start.tv_nsec; + + uint64_t diff_ns_total = diff_s * 1000000000 + diff_ns; + + std::cerr << ordinal << " " << diff_ns_total << std::endl; +} +} diff --git a/scripts/matching-profile/join-with-locations b/scripts/matching-profile/join-with-locations new file mode 100755 index 000000000..029d64ae9 --- /dev/null +++ b/scripts/matching-profile/join-with-locations @@ -0,0 +1,6 @@ +#!/bin/bash +# Converts a table of ordinals and seconds to a table of source/location and seconds. Strips directory from file paths. +while read -r ordinal time; do + loc=$(llvm-kompile-compute-loc "$2" $ordinal) + echo "${loc##*/} $time" +done < "$1" diff --git a/scripts/matching-profile/matching-profile-formatter b/scripts/matching-profile/matching-profile-formatter new file mode 100755 index 000000000..a6009cc75 --- /dev/null +++ b/scripts/matching-profile/matching-profile-formatter @@ -0,0 +1,4 @@ +#!/bin/bash +# Sorts table of locations and seconds in reverse order and displays it with individual and cumulative percentages. +sum=$(cat "$1" | awk '{sum += $2} END { print sum }') +tac "$1" | awk '{perc_local=$2 / '"$sum"' * 100; perc += perc_local; print $1 " " $2 " " perc_local " " perc}' diff --git a/scripts/matching-profile/sum-matching-profile b/scripts/matching-profile/sum-matching-profile new file mode 100755 index 000000000..22fc5dc1e --- /dev/null +++ b/scripts/matching-profile/sum-matching-profile @@ -0,0 +1,3 @@ +#!/bin/bash +# Aggregates data from --profile-matching and sums together identical ordinals, yielding table with ordinals and seconds. +cat "$1" | awk '{sum[$1] += $2} END { for (i in sum) print i, sum[i]}' | sort -n -k2 | awk '{ print $1, $2 / 1000000000}' diff --git a/tools/llvm-kompile-codegen/main.cpp b/tools/llvm-kompile-codegen/main.cpp index bfe01ad22..99fabf3d4 100644 --- a/tools/llvm-kompile-codegen/main.cpp +++ b/tools/llvm-kompile-codegen/main.cpp @@ -78,6 +78,12 @@ cl::opt safe_partial( "exception."), cl::init(false), cl::cat(codegen_tool_cat)); +cl::opt profile_matching( + "profile-matching", + cl::desc("Instrument k_step functions with code to profile time spent " + "matching when applying each rule."), + cl::init(false), cl::cat(codegen_tool_cat)); + namespace { fs::path dt_dir() { @@ -174,7 +180,8 @@ int main(int argc, char **argv) { definition->get_hooked_sorts()); make_apply_rule_function( axiom, definition.get(), mod.get(), residuals.residuals); - make_step_function(axiom, definition.get(), mod.get(), residuals); + make_step_function( + axiom, definition.get(), mod.get(), residuals, profile_matching); } else { make_apply_rule_function(axiom, definition.get(), mod.get(), true); } @@ -195,11 +202,11 @@ int main(int argc, char **argv) { auto *dt = parse_yamldecision_tree( mod.get(), decision_tree, definition->get_all_symbols(), definition->get_hooked_sorts()); - make_step_function(definition.get(), mod.get(), dt, false); + make_step_function(definition.get(), mod.get(), dt, false, profile_matching); auto *dt_search = parse_yamldecision_tree( mod.get(), dt_dir() / "dt-search.yaml", definition->get_all_symbols(), definition->get_hooked_sorts()); - make_step_function(definition.get(), mod.get(), dt_search, true); + make_step_function(definition.get(), mod.get(), dt_search, true, false); auto index = read_index_file(); for (auto const &entry : definition->get_symbols()) {