diff --git a/src/liboslexec/backendllvm.h b/src/liboslexec/backendllvm.h index ca6ead606..f4093425e 100644 --- a/src/liboslexec/backendllvm.h +++ b/src/liboslexec/backendllvm.h @@ -61,6 +61,7 @@ class BackendLLVM final : public OSOProcessorBase { // Create llvm functions for OptiX callables std::vector build_llvm_optix_callables(); llvm::Function* build_llvm_fused_callable(); + llvm::Function* build_check_layer_skip_stub(); /// Build up LLVM IR code for the given range [begin,end) or /// opcodes, putting them (initially) into basic block bb (or the diff --git a/src/liboslexec/llvm_gen.cpp b/src/liboslexec/llvm_gen.cpp index 807d7389b..219a5b5c8 100644 --- a/src/liboslexec/llvm_gen.cpp +++ b/src/liboslexec/llvm_gen.cpp @@ -118,6 +118,40 @@ BackendLLVM::llvm_call_layer(int layer, bool unconditional) // if it's run unconditionally. // The code in the parent layer itself will set its 'executed' flag. + // + // WIP COMMENT: + // Do something a little more complicated in the conditional case. + // + // We set up a stub function osl_check_layer_skip_stub(parentlayer) { return false; } + // + // In addition to checking groupdata->run[parentlayer], + // we also call the stub "bool skip = osl_check_layer_skip_stub(parentlayer)". + // The conditional becomes if (!skip && !groupdata->run[parentlayer]). + // + // During optimization, we analyze each call to that stub function. + // For a given call, we can walk up llvm's dominator tree and search + // for prior calls to the stub function. Finding one guarantees that for + // the current call, the layer is guaranteed to have already run. + // + // So if we find a hit, we replace the call with the constant true: + // bool skip = osl_check_layer_skip_stub(parentlayer) -> bool skip = true; + // Then (!skip && !groupdata->run[parentlayer]) is known false and llvm can + // constant-fold the entire if-statement away. + // + // If we don't find a hit, llvm can still inline the stub and we codegen the + // original if-statement. + // + // Similarly, if we skip the optimization, we still generate the correct code, + // we just won't remove any unnecessary checks. + // + + llvm::Value* skip = ll.constant_bool(false); + if (!unconditional) { + llvm::Value* args[] + = { ll.constant(layer), sg_ptr() }; + skip = ll.call_function("osl_check_layer_skip_stub", args); + } + llvm::Value* args[] = { sg_ptr(), groupdata_ptr(), userdata_base_ptr(), output_base_ptr(), shadeindex(), m_llvm_interactive_params_ptr }; @@ -129,6 +163,7 @@ BackendLLVM::llvm_call_layer(int layer, bool unconditional) if (!unconditional) { llvm::Value* executed = ll.op_load(layerfield); executed = ll.op_ne(executed, trueval); + executed = ll.op_and(ll.op_not(skip), executed); then_block = ll.new_basic_block(""); after_block = ll.new_basic_block(""); ll.op_branch(executed, then_block, after_block); diff --git a/src/liboslexec/llvm_instance.cpp b/src/liboslexec/llvm_instance.cpp index e2a3ed471..3f5a5a502 100644 --- a/src/liboslexec/llvm_instance.cpp +++ b/src/liboslexec/llvm_instance.cpp @@ -990,6 +990,42 @@ BackendLLVM::build_llvm_init() return ll.current_function(); } +llvm::Function* BackendLLVM::build_check_layer_skip_stub() +{ + // This just creates a function that returns false + + llvm::Function* stub = ll.make_function( + "osl_check_layer_skip_stub", + false, + ll.type_bool(), + { + ll.type_int(), + llvm_type_sg_ptr(), + }, + false); + + ll.current_function(stub); + + if (ll.debug_is_enabled()) { + ustring sourcefile + = group()[0]->op(group()[0]->maincodebegin()).sourcefile(); + ll.debug_push_function("osl_check_layer_skip_stub", sourcefile, 1); + } + + llvm::BasicBlock* entry_bb = ll.new_basic_block("check_layer_skip_stub-bb"); + ll.new_builder(entry_bb); + + ll.op_return(ll.constant_bool(false)); + + if (ll.debug_is_enabled()) { + ll.debug_pop_function(); + } + + ll.end_builder(); + return stub; + +} + // OptiX Callables: // Builds three OptiX callables: an init wrapper, an entry layer wrapper, // and a "fused" callable that wraps both and owns the groupdata params buffer. @@ -1575,6 +1611,7 @@ BackendLLVM::run() #ifdef OSL_LLVM_NO_BITCODE OSL_ASSERT(!use_rs_bitcode()); ll.module(ll.new_module("llvm_ops")); + # if OSL_USE_OPTIX if (use_optix()) { // If the module is created from LLVM bitcode, the target and @@ -1689,6 +1726,7 @@ BackendLLVM::run() shadingsys().m_stat_empty_instances += nlayers - m_num_used_layers; initialize_llvm_group(); + build_check_layer_skip_stub(); // Generate the LLVM IR for each layer. Skip unused layers. m_llvm_local_mem = 0; diff --git a/src/liboslexec/llvm_util.cpp b/src/liboslexec/llvm_util.cpp index 3b443f751..1fb8afcf2 100644 --- a/src/liboslexec/llvm_util.cpp +++ b/src/liboslexec/llvm_util.cpp @@ -79,6 +79,7 @@ #include #include #include +#include #include @@ -92,6 +93,7 @@ #include #include + OSL_NAMESPACE_ENTER @@ -1751,7 +1753,98 @@ LLVM_Util::InstallLazyFunctionCreator(void* (*P)(const std::string&)) exec->InstallLazyFunctionCreator(P); } +namespace { + +struct CheckLayerRemovalPass : public llvm::FunctionPass { + int m_calls_checked; + int m_calls_removed; + + static char ID; + CheckLayerRemovalPass() : FunctionPass(ID) {} + + bool doInitialization(llvm::Module &M) override { + m_calls_checked = 0; + m_calls_removed = 0; + return true; + } + + bool runOnFunction(llvm::Function &F) override { + llvm::DominatorTree &dt = getAnalysis().getDomTree(); + + llvm::ValueMap> bblock_layer_lookup; + + const std::string target_fn = "osl_check_layer_skip_stub"; + // Find all of the stub calls and associate them with their basic block + for(auto I = llvm::inst_begin(F), E = llvm::inst_end(F); I != E; ++I) { + auto* call_inst = llvm::dyn_cast(&*I); + if (call_inst) { + llvm::Function* called = call_inst->getCalledFunction(); + if (called && called->getName() == target_fn) { + llvm::Value *arg = call_inst->getArgOperand(0); + int layer = llvm::cast(arg)->getSExtValue(); + + llvm::BasicBlock* bblock = I->getParent(); + bblock_layer_lookup[bblock].insert(layer); + m_calls_checked++; + } + } + } + + if (bblock_layer_lookup.size() == 0) + return false; + + // For each stub call, walk the dominator tree and look for a previous + // matching call. + std::unordered_set delete_queue; + for(auto I = llvm::inst_begin(F), E = llvm::inst_end(F); I != E; ++I) { + auto* call_inst = llvm::dyn_cast(&*I); + if (call_inst) { + llvm::Function* called = call_inst->getCalledFunction(); + if (called && called->getName() == target_fn) { + llvm::Value *arg = call_inst->getArgOperand(0); + int layer = llvm::cast(arg)->getSExtValue(); + + llvm::BasicBlock* bblock = I->getParent(); + auto bbnode = dt.getNode(bblock)->getIDom(); + while (bbnode) { + llvm::BasicBlock* candidate_bblock = bbnode->getBlock(); + if (bblock_layer_lookup[candidate_bblock].count(layer) > 0) { + delete_queue.insert(call_inst); + break; + } + bbnode = bbnode->getIDom(); + } + } + } + } + + int count = delete_queue.size(); + if (count == 0) + return false; + + // Delete all the unnecessary stubs identified above + + // WIP: This appears to be the cause of the performance regression + // (Not the dominator tree analysis above) + llvm::Value* fake = llvm::ConstantInt::get(F.getContext(), llvm::APInt(32, 1)); + for (llvm::CallInst* inst : delete_queue) { + llvm::BasicBlock::iterator iterator(inst); + llvm::ReplaceInstWithValue(inst->getParent()->getInstList(), iterator, fake); + m_calls_removed++; + } + + return true; + } + + void getAnalysisUsage(llvm::AnalysisUsage &AU) const override { + AU.addRequired(); + } + +}; +} + +char CheckLayerRemovalPass::ID = 0; void LLVM_Util::setup_optimization_passes(int optlevel, bool target_host) @@ -1767,6 +1860,8 @@ LLVM_Util::setup_optimization_passes(int optlevel, bool target_host) m_llvm_module_passes = new llvm::legacy::PassManager; llvm::legacy::PassManager& mpm = (*m_llvm_module_passes); + // TODO: Add based on optlevel + mpm.add(new CheckLayerRemovalPass()); llvm::TargetMachine* target_machine = nullptr; if (target_host) { @@ -5968,8 +6063,10 @@ LLVM_Util::bitcode_string(llvm::Module* module) std::string s; llvm::raw_string_ostream stream(s); - for (auto&& func : module->getFunctionList()) - stream << func << '\n'; + module->print(stream, nullptr); + + // for (auto&& func : module->getFunctionList()) + // stream << func << '\n'; return stream.str(); } diff --git a/src/liboslexec/opstring.cpp b/src/liboslexec/opstring.cpp index 94b1916ab..3b667e6fa 100644 --- a/src/liboslexec/opstring.cpp +++ b/src/liboslexec/opstring.cpp @@ -175,7 +175,6 @@ osl_printf(ShaderGlobals* sg, const char* format_str, ...) sg->context->messagefmt("{}", s); } - OSL_SHADEOP void osl_error(ShaderGlobals* sg, const char* format_str, ...) {