Skip to content

Commit

Permalink
llvm-kompile --profile-matching (#1095)
Browse files Browse the repository at this point in the history
We add a new flag to `llvm-kompile` which, when enabled, instruments the
`k_step` function and the optimized `step_<ordinal>` functions with
calls to a new translation unit, clock.cpp, which contains some very
basic use of `clock_gettime` in order to determine how much time is
being spent doing pattern matching in order to try to apply each rewrite
step. The resulting file contains data about the ordinal of the rule
that applied and the time it took to determine that that rule should
apply. It emits this data on stderr in a tabular format that can be read
easily by a computer. This information is useful in determining how to
optimize pattern matching in cases where it is still not clear where
time is being spent even after `kompile -O3 --iterated-threshold 1` is
passed. When the flag is not passed to the code generator, no
instrumentation is emitted, meaning this is zero-overhead unless it is
enabled.
  • Loading branch information
Dwight Guth authored Jun 20, 2024
1 parent f419535 commit ffb7b10
Show file tree
Hide file tree
Showing 9 changed files with 87 additions and 17 deletions.
9 changes: 8 additions & 1 deletion bin/llvm-kompile
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,10 @@ Options:
--mutable-bytes Use the faster, unsound (mutable) semantics for objects of sort
Bytes at run time, rather than the slower, sound
(immutable) that are enabled by default.
--hidden-visibility Set the visibility of all global symbols in generated code to "hidden"
--hidden-visibility Set the visibility of all global symbols in generated code to
"hidden"
--profile-matching Instrument interpeter to emit a profile of time spent in
top-level rule matching on stderr.
-O[0123] Set the optimization level for code generation.
Any option not listed above will be passed through to clang; use '--' to
Expand Down Expand Up @@ -187,6 +190,10 @@ while [[ $# -gt 0 ]]; do
kompile_clang_flags+=("--hidden-visibility")
shift
;;
--profile-matching)
codegen_flags+=("--profile-matching")
shift
;;
-O*)
codegen_flags+=("$1")
kompile_clang_flags+=("$1")
Expand Down
10 changes: 6 additions & 4 deletions include/kllvm/codegen/Decision.h
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,7 @@ class decision {
value_type cat_;
llvm::PHINode *fail_subject_, *fail_pattern_, *fail_sort_;
llvm::AllocaInst *has_search_results_;
bool profile_matching_;

std::map<var_type, llvm::AllocaInst *> symbols_{};

Expand All @@ -422,7 +423,7 @@ class decision {
llvm::AllocaInst *choice_buffer, llvm::AllocaInst *choice_depth,
llvm::Module *module, value_type cat, llvm::PHINode *fail_subject,
llvm::PHINode *fail_pattern, llvm::PHINode *fail_sort,
llvm::AllocaInst *has_search_results)
llvm::AllocaInst *has_search_results, bool profile_matching = false)
: definition_(definition)
, current_block_(entry_block)
, failure_block_(failure_block)
Expand All @@ -435,7 +436,8 @@ class decision {
, fail_subject_(fail_subject)
, fail_pattern_(fail_pattern)
, fail_sort_(fail_sort)
, has_search_results_(has_search_results) { }
, has_search_results_(has_search_results)
, profile_matching_(profile_matching) { }

/* adds code to the specified basic block to take a single step based on
the specified decision tree and return the result of taking that step. */
Expand Down Expand Up @@ -464,10 +466,10 @@ void make_anywhere_function(

void make_step_function(
kore_definition *definition, llvm::Module *module, decision_node *dt,
bool search);
bool search, bool profile_matching);
void make_step_function(
kore_axiom_declaration *axiom, kore_definition *definition,
llvm::Module *module, partial_step res);
llvm::Module *module, partial_step res, bool profile_matching);
void make_match_reason_function(
kore_definition *definition, llvm::Module *module,
kore_axiom_declaration *axiom, decision_node *dt);
Expand Down
36 changes: 27 additions & 9 deletions lib/codegen/Decision.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,14 @@ void leaf_node::codegen(decision *d) {
= proof_event(d->definition_, d->module_)
.rewrite_event_pre(axiom, arity, vars, subst, d->current_block_);

if (d->profile_matching_) {
llvm::CallInst::Create(
get_or_insert_function(
d->module_, "stop_clock", llvm::Type::getVoidTy(d->ctx_),
llvm::Type::getInt64Ty(d->ctx_)),
{llvm::ConstantInt::get(llvm::Type::getInt64Ty(d->ctx_), ordinal)}, "",
d->current_block_);
}
auto *call = llvm::CallInst::Create(apply_rule, args, "", d->current_block_);
set_debug_loc(call);
call->setCallingConv(llvm::CallingConv::Tail);
Expand Down Expand Up @@ -707,7 +715,7 @@ static void init_choice_buffer(
decision_node *dt, llvm::Module *module, llvm::BasicBlock *block,
llvm::BasicBlock *stuck, llvm::BasicBlock *fail,
llvm::AllocaInst **choice_buffer_out, llvm::AllocaInst **choice_depth_out,
llvm::IndirectBrInst **jump_out) {
llvm::IndirectBrInst **jump_out, bool profile_matching) {
std::unordered_set<leaf_node *> leaves;
dt->preprocess(leaves);
auto *ty = llvm::ArrayType::get(
Expand All @@ -716,6 +724,12 @@ static void init_choice_buffer(
auto *choice_buffer = new llvm::AllocaInst(ty, 0, "choiceBuffer", block);
auto *choice_depth = new llvm::AllocaInst(
llvm::Type::getInt64Ty(module->getContext()), 0, "choiceDepth", block);
if (profile_matching) {
llvm::CallInst::Create(
get_or_insert_function(
module, "start_clock", llvm::Type::getVoidTy(module->getContext())),
{}, "", block);
}
auto *zero
= llvm::ConstantInt::get(llvm::Type::getInt64Ty(module->getContext()), 0);
new llvm::StoreInst(zero, choice_depth, block);
Expand Down Expand Up @@ -799,7 +813,8 @@ void make_eval_or_anywhere_function(
llvm::AllocaInst *choice_depth = nullptr;
llvm::IndirectBrInst *jump = nullptr;
init_choice_buffer(
dt, module, block, stuck, fail, &choice_buffer, &choice_depth, &jump);
dt, module, block, stuck, fail, &choice_buffer, &choice_depth, &jump,
false);

int i = 0;
decision codegen(
Expand Down Expand Up @@ -1113,7 +1128,7 @@ std::pair<std::vector<llvm::Value *>, llvm::BasicBlock *> step_function_header(

void make_step_function(
kore_definition *definition, llvm::Module *module, decision_node *dt,
bool search) {
bool search, bool profile_matching) {
auto *block_type = getvalue_type({sort_category::Symbol, 0}, module);
auto *debug_type
= get_debug_type({sort_category::Symbol, 0}, "SortGeneratedTopCell{}");
Expand Down Expand Up @@ -1155,7 +1170,8 @@ void make_step_function(
llvm::IndirectBrInst *jump = nullptr;

init_choice_buffer(
dt, module, block, pre_stuck, fail, &choice_buffer, &choice_depth, &jump);
dt, module, block, pre_stuck, fail, &choice_buffer, &choice_depth, &jump,
profile_matching);
insert_call_to_clear(block);

llvm::AllocaInst *has_search_results = nullptr;
Expand All @@ -1178,7 +1194,7 @@ void make_step_function(
decision codegen(
definition, result.second, fail, jump, choice_buffer, choice_depth,
module, {sort_category::Symbol, 0}, nullptr, nullptr, nullptr,
has_search_results);
has_search_results, profile_matching);
codegen.store(
std::make_pair(collected_val->getName().str(), collected_val->getType()),
collected_val);
Expand Down Expand Up @@ -1288,7 +1304,8 @@ void make_match_reason_function(
llvm::AllocaInst *choice_depth = nullptr;
llvm::IndirectBrInst *jump = nullptr;
init_choice_buffer(
dt, module, block, pre_stuck, fail, &choice_buffer, &choice_depth, &jump);
dt, module, block, pre_stuck, fail, &choice_buffer, &choice_depth, &jump,
false);

init_debug_param(
match_func, 0, "subject", {sort_category::Symbol, 0},
Expand Down Expand Up @@ -1340,7 +1357,7 @@ kore_pattern *make_partial_term(

void make_step_function(
kore_axiom_declaration *axiom, kore_definition *definition,
llvm::Module *module, partial_step res) {
llvm::Module *module, partial_step res, bool profile_matching) {
auto *block_type = getvalue_type({sort_category::Symbol, 0}, module);
std::vector<llvm::Type *> arg_types;
std::vector<llvm::Metadata *> debug_types;
Expand Down Expand Up @@ -1387,7 +1404,7 @@ void make_step_function(
llvm::IndirectBrInst *jump = nullptr;
init_choice_buffer(
res.dt, module, block, pre_stuck, fail, &choice_buffer, &choice_depth,
&jump);
&jump, profile_matching);
insert_call_to_clear(block);

llvm::BranchInst::Create(stuck, pre_stuck);
Expand All @@ -1414,7 +1431,8 @@ void make_step_function(
i = 0;
decision codegen(
definition, header.second, fail, jump, choice_buffer, choice_depth,
module, {sort_category::Symbol, 0}, nullptr, nullptr, nullptr, nullptr);
module, {sort_category::Symbol, 0}, nullptr, nullptr, nullptr, nullptr,
profile_matching);
for (auto *val : header.first) {
val->setName(res.residuals[i].occurrence.substr(0, max_name_length));
codegen.store(std::make_pair(val->getName().str(), val->getType()), val);
Expand Down
1 change: 1 addition & 0 deletions runtime/util/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ add_library(util STATIC
match_log.cpp
search.cpp
util.cpp
clock.cpp
)

install(
Expand Down
22 changes: 22 additions & 0 deletions runtime/util/clock.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#include <cstdint>
#include <ctime>
#include <iostream>

static struct timespec start { };

extern "C" {
void start_clock() {
clock_gettime(CLOCK_MONOTONIC, &start);
}

void stop_clock(uint64_t ordinal) {
struct timespec stop { };
clock_gettime(CLOCK_MONOTONIC, &stop);
int64_t diff_s = stop.tv_sec - start.tv_sec;
int64_t diff_ns = stop.tv_nsec - start.tv_nsec;

uint64_t diff_ns_total = diff_s * 1000000000 + diff_ns;

std::cerr << ordinal << " " << diff_ns_total << std::endl;
}
}
6 changes: 6 additions & 0 deletions scripts/matching-profile/join-with-locations
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash
# Converts a table of ordinals and seconds to a table of source/location and seconds. Strips directory from file paths.
while read -r ordinal time; do
loc=$(llvm-kompile-compute-loc "$2" $ordinal)
echo "${loc##*/} $time"
done < "$1"
4 changes: 4 additions & 0 deletions scripts/matching-profile/matching-profile-formatter
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash
# Sorts table of locations and seconds in reverse order and displays it with individual and cumulative percentages.
sum=$(cat "$1" | awk '{sum += $2} END { print sum }')
tac "$1" | awk '{perc_local=$2 / '"$sum"' * 100; perc += perc_local; print $1 " " $2 " " perc_local " " perc}'
3 changes: 3 additions & 0 deletions scripts/matching-profile/sum-matching-profile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash
# Aggregates data from --profile-matching and sums together identical ordinals, yielding table with ordinals and seconds.
cat "$1" | awk '{sum[$1] += $2} END { for (i in sum) print i, sum[i]}' | sort -n -k2 | awk '{ print $1, $2 / 1000000000}'
13 changes: 10 additions & 3 deletions tools/llvm-kompile-codegen/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ cl::opt<bool> safe_partial(
"exception."),
cl::init(false), cl::cat(codegen_tool_cat));

cl::opt<bool> profile_matching(
"profile-matching",
cl::desc("Instrument k_step functions with code to profile time spent "
"matching when applying each rule."),
cl::init(false), cl::cat(codegen_tool_cat));

namespace {

fs::path dt_dir() {
Expand Down Expand Up @@ -174,7 +180,8 @@ int main(int argc, char **argv) {
definition->get_hooked_sorts());
make_apply_rule_function(
axiom, definition.get(), mod.get(), residuals.residuals);
make_step_function(axiom, definition.get(), mod.get(), residuals);
make_step_function(
axiom, definition.get(), mod.get(), residuals, profile_matching);
} else {
make_apply_rule_function(axiom, definition.get(), mod.get(), true);
}
Expand All @@ -195,11 +202,11 @@ int main(int argc, char **argv) {
auto *dt = parse_yamldecision_tree(
mod.get(), decision_tree, definition->get_all_symbols(),
definition->get_hooked_sorts());
make_step_function(definition.get(), mod.get(), dt, false);
make_step_function(definition.get(), mod.get(), dt, false, profile_matching);
auto *dt_search = parse_yamldecision_tree(
mod.get(), dt_dir() / "dt-search.yaml", definition->get_all_symbols(),
definition->get_hooked_sorts());
make_step_function(definition.get(), mod.get(), dt_search, true);
make_step_function(definition.get(), mod.get(), dt_search, true, false);

auto index = read_index_file();
for (auto const &entry : definition->get_symbols()) {
Expand Down

0 comments on commit ffb7b10

Please sign in to comment.