Skip to content

Commit

Permalink
First OMPT stuffs
Browse files Browse the repository at this point in the history
  • Loading branch information
cvonelm committed Feb 11, 2025
1 parent 36ce911 commit 48030ec
Show file tree
Hide file tree
Showing 5 changed files with 169 additions and 1 deletion.
29 changes: 28 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ include(CheckStructHasMember)
include(CheckFunctionExists)
include(FeatureSummary)
include(GNUInstallDirs)

SET(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake;${CMAKE_MODULE_PATH}")

include(cmake/DefaultBuildType.cmake)
Expand Down Expand Up @@ -119,6 +118,7 @@ find_package(Radare)
find_package(Audit)
find_package(LibElf REQUIRED)
find_package(Debuginfod)
find_package(OpenMP)


# configurable options
Expand All @@ -140,6 +140,8 @@ CMAKE_DEPENDENT_OPTION(USE_CUPTI "Use CUPTI to record CUDA activity." ON "CUDATo
add_feature_info("USE_CUPTI" USE_CUPTI "Use CUPTI to record CUDA activity.")
CMAKE_DEPENDENT_OPTION(USE_DEBUGINFOD "Use Debuginfod to download debug information on-demand." ON "Debuginfod_FOUND" OFF)
add_feature_info("USE_DEBUGINFOD" USE_DEBUGINFOD "Use Debuginfod to download debug information on-demand.")
CMAKE_DEPENDENT_OPTION(USE_OMPT "Use Debuginfod to download debug information on-demand." ON "OpenMP_CXX_FOUND" OFF)
add_feature_info("USE_OMPT" USE_OMPT "Use Debuginfod to download debug information on-demand.")
# system configuration checks
CHECK_INCLUDE_FILES(linux/hw_breakpoint.h HAVE_HW_BREAKPOINT_H)
CHECK_STRUCT_HAS_MEMBER("struct perf_event_attr" clockid linux/perf_event.h HAVE_PERF_EVENT_ATTR_CLOCKID)
Expand Down Expand Up @@ -357,6 +359,7 @@ target_include_directories(rb_test PRIVATE include ${CMAKE_CURRENT_BINARY_DIR}/i
otf2xx::Writer)

set(LO2S_CUDA_INJECTIONLIB_PATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/liblo2s_injection.so")

if(USE_CUPTI)
if(CUDAToolkit_FOUND)
add_library(lo2s_injection SHARED src/cupti/lib.cpp src/types.cpp)
Expand Down Expand Up @@ -386,6 +389,30 @@ if(USE_CUPTI)
message(SEND_ERROR "Cupti not found but requested.")
endif()
endif()
if(USE_OMPT)
if(OpenMP_CXX_FOUND)
add_library(ompt_injection SHARED src/ompt/lib.cpp)
target_include_directories(ompt_injection PRIVATE include
${CMAKE_CURRENT_BINARY_DIR}/include)

target_link_libraries(ompt_injection PRIVATE fmt::fmt
Nitro::log
Nitro::env
Nitro::dl
Nitro::options
otf2xx::Writer)
target_link_libraries(ompt_injection PRIVATE OpenMP::OpenMP_CXX)

if(SHM_OPEN_FOUND_WITH_RT)
target_link_libraries(ompt_injection PRIVATE rt)
endif()

target_compile_definitions(lo2s PUBLIC HAVE_OMPT)
install(TARGETS ompt_injection LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
else()
message(SEND_ERROR "OMPT not found but requested.")
endif()
endif()



Expand Down
1 change: 1 addition & 0 deletions include/lo2s/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ struct Config
bool use_nvidia = false;
std::string cuda_injectionlib_path;
uint64_t nvidia_ringbuf_size;
bool use_ompt = false;
DwarfUsage dwarf;

std::string socket_path;
Expand Down
12 changes: 12 additions & 0 deletions src/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,9 @@ void parse_program_options(int argc, const char** argv)
#ifdef HAVE_VEOSINFO
accelerators.push_back("nec");
#endif
#ifdef HAVE_OMPT
accelerators.push_back("ompt");
#endif

accel_options
.multi_option(
Expand Down Expand Up @@ -613,6 +616,15 @@ void parse_program_options(int argc, const char** argv)
#else
std::cerr << "lo2s was built without support for CUDA kernel recording\n";
std::exit(EXIT_FAILURE);
#endif
}
else if (accel == "ompt")
{
#ifdef HAVE_OMPT
config.use_ompt = true;
#else
std::cerr << "lo2s was built without support for CUDA kernel recording\n";
std::exit(EXIT_FAILURE);
#endif
}
else
Expand Down
11 changes: 11 additions & 0 deletions src/monitor/process_monitor_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,17 @@ std::vector<char*> to_vector_of_c_str(const std::vector<std::string>& vec)
}
}
#endif
#ifdef HAVE_OMPT
if (config().use_ompt)
{
Log::error() << "Using ompt!";
env.push_back("OMP_TOOL=enabled");
env.push_back("OMP_TOOL_LIBRARIES=libompt_injection.so");
env.push_back("LD_LIBRARY_PATH=/home/cvonelm/dev/lo2s/build");
//+ config().cuda_injectionlib_path );
}
#endif

std::vector<char*> c_env = to_vector_of_c_str(env);
std::vector<char*> c_args = to_vector_of_c_str(command_and_args);

Expand Down
117 changes: 117 additions & 0 deletions src/ompt/lib.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
#include <omp-tools.h>
#include <omp.h>

#include <assert.h>
#include <inttypes.h>
#include <stdio.h>
#include <sys/resource.h>

#include <lo2s/ringbuf.hpp>

#define PARALLEL_BEGIN 1
#define PARALLEL_END 2
std::unique_ptr<lo2s::RingbufWriter> rb_writer = nullptr;
clockid_t clockid = CLOCK_MONOTONIC_RAW;
std::map<uint64_t, std::string> cctx_map = { { PARALLEL_BEGIN, "parallel_begin" },
{ PARALLEL_END, "parallel_end" } };

uint64_t timestampfunc()
{
struct timespec ts;
clock_gettime(clockid, &ts);
std::cerr << clockid << std::endl;
uint64_t res = ts.tv_sec * 1000000000 + ts.tv_nsec;
std::cerr << res << std::endl;
return res;
}

static void on_ompt_callback_implicit_task(ompt_scope_endpoint_t endpoint,
ompt_data_t* parallel_data, ompt_data_t* task_data,
unsigned int actual_parallelism, unsigned int index,
int flags)
{
}

static void on_ompt_callback_parallel_begin(ompt_data_t* encountering_task_data,
const ompt_frame_t* encountering_task_frame,
ompt_data_t* parallel_data,
uint32_t requested_parallelism, int flags,
const void* codeptr_ra)
{
printf("PARALLEL_BEGIN!");
}

static void on_ompt_callback_task_create(
ompt_data_t* encountering_task_data, /* id of parent task */
const ompt_frame_t* encountering_task_frame, /* frame data for parent task */
ompt_data_t* new_task_data, /* id of created task */
int flags, int has_dependences, const void* codeptr_ra) /* pointer to outlined function */
{
}

static void on_ompt_callback_task_schedule(ompt_data_t* prior_task_data,
ompt_task_status_t prior_task_status,
ompt_data_t* next_task_data)
{
}

static void on_ompt_callback_thread_begin(ompt_thread_t thread_type, ompt_data_t* thread_data)
{
}

static void on_ompt_callback_parallel_end(ompt_data_t* parallel_data,
ompt_data_t* encountering_task_data, int flags,
const void* codeptr_ra)
{
printf("PARALLEL_END!");
}

static void on_ompt_callback_thread_end(ompt_data_t* thread_data)
{
}

#define register_callback_t(name, type) \
do \
{ \
type f_##name = &on_##name; \
if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never) \
printf("0: Could not register callback '" #name "'\n"); \
} while (0)

#define register_callback(name) register_callback_t(name, name##_t)

int ompt_initialize(ompt_function_lookup_t lookup, int initial_device_num, ompt_data_t* tool_data)
{
std::cout << "Initializing injection!" << std::endl;
pid_t pid = getpid();
rb_writer =
std::make_unique<lo2s::RingbufWriter>(16, lo2s::ExecutionScope(lo2s::Process(pid)), "cuda");

while (!rb_writer->ready())
{
};
clockid = rb_writer->header()->clockid;
ompt_set_callback_t ompt_set_callback = (ompt_set_callback_t)lookup("ompt_set_callback");

register_callback(ompt_callback_implicit_task);
register_callback(ompt_callback_parallel_begin);
register_callback(ompt_callback_parallel_end);
register_callback(ompt_callback_task_create);
register_callback(ompt_callback_task_schedule);
register_callback(ompt_callback_thread_begin);
register_callback(ompt_callback_thread_end);

return 1; // success
}

void ompt_finalize(ompt_data_t* tool_data)
{
}

ompt_start_tool_result_t* ompt_start_tool(unsigned int omp_version, const char* runtime_version)
{
static ompt_start_tool_result_t ompt_start_tool_result = { &ompt_initialize,
&ompt_finalize,
{ .value = 0 } };
return &ompt_start_tool_result;
}

0 comments on commit 48030ec

Please sign in to comment.