Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

merge kp-kernels-timers #235

Merged
merged 9 commits into from
Feb 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion build-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ make -f $ROOT_DIR/profiling/memory-usage/Makefile
make -f $ROOT_DIR/profiling/nvtx-connector/Makefile
make -f $ROOT_DIR/profiling/nvtx-focused-connector/Makefile
make -f $ROOT_DIR/profiling/papi-connector/Makefile
make -f $ROOT_DIR/profiling/simple-kernel-timer-json/Makefile
make -f $ROOT_DIR/profiling/simple-kernel-timer/Makefile
make -f $ROOT_DIR/profiling/space-time-stack/Makefile
make -f $ROOT_DIR/profiling/systemtap-connector/Makefile
Expand Down
1 change: 0 additions & 1 deletion example/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ endmacro()
# and exported output in expected format, fail the test otherwise.
if(NOT WIN32)
add_kp_test(kernel_timer "kernel-timer")
add_kp_test(kernel_timer_json "kernel-timer-json")
add_kp_test(memory_events "memory-events")
add_kp_test(memory_usage "memory-usage")
add_kp_test(chrome_tracing "chrome-tracing")
Expand Down
8 changes: 3 additions & 5 deletions profiling/all/kp_all.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@

#ifndef WIN32
KOKKOSTOOLS_EXTERN_EVENT_SET(KernelTimer)
KOKKOSTOOLS_EXTERN_EVENT_SET(KernelTimerJSON)
KOKKOSTOOLS_EXTERN_EVENT_SET(MemoryEvents)
KOKKOSTOOLS_EXTERN_EVENT_SET(MemoryUsage)
KOKKOSTOOLS_EXTERN_EVENT_SET(HighwaterMark)
Expand Down Expand Up @@ -69,10 +68,9 @@ namespace KokkosTools {
EventSet get_event_set(const char* profiler, const char* config_str) {
std::map<std::string, EventSet> handlers;
#ifndef WIN32
handlers["kernel-timer"] = KernelTimer::get_event_set();
handlers["kernel-timer-json"] = KernelTimerJSON::get_event_set();
handlers["memory-events"] = MemoryEvents::get_event_set();
handlers["memory-usage"] = MemoryUsage::get_event_set();
handlers["kernel-timer"] = KernelTimer::get_event_set();
handlers["memory-events"] = MemoryEvents::get_event_set();
handlers["memory-usage"] = MemoryUsage::get_event_set();
#if USE_MPI
handlers["highwater-mark-mpi"] = HighwaterMarkMPI::get_event_set();
#endif
Expand Down
4 changes: 0 additions & 4 deletions profiling/simple-kernel-timer/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,6 @@ if(NOT MSVC)
set_property(TARGET kp_kernel_shared PROPERTY POSITION_INDEPENDENT_CODE ON)
endif()

# Add JSON kernel-timer
kp_add_library(kp_kernel_timer_json kp_kernel_timer_json.cpp)
target_link_libraries(kp_kernel_timer_json PRIVATE kp_kernel_shared)

# Add binary kernel-timer
kp_add_library(kp_kernel_timer kp_kernel_timer.cpp)
target_link_libraries(kp_kernel_timer PRIVATE kp_kernel_shared)
Expand Down
84 changes: 78 additions & 6 deletions profiling/simple-kernel-timer/kp_kernel_timer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
//@HEADER

#include <vector>
#include <algorithm>
#include <string>
#include <iostream>
#include <unistd.h>
Expand All @@ -25,6 +26,10 @@
namespace KokkosTools {
namespace KernelTimer {

bool is_region(KernelPerformanceInfo const& kp) {
return kp.getKernelType() == REGION;
}

void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer,
const uint32_t /*devInfoCount*/,
Kokkos_Profiling_KokkosPDeviceInfo* /*deviceInfo*/) {
Expand Down Expand Up @@ -52,23 +57,90 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer,
void kokkosp_finalize_library() {
double finishTime = seconds();

const char* kokkos_tools_timer_json_raw = getenv("KOKKOS_TOOLS_TIMER_JSON");
const bool kokkos_tools_timer_json =
kokkos_tools_timer_json_raw == NULL
? false
: strcmp(kokkos_tools_timer_json_raw, "1") == 0 ||
strcmp(kokkos_tools_timer_json_raw, "true") == 0 ||
strcmp(kokkos_tools_timer_json_raw, "True") == 0;

double kernelTimes = 0;

char* hostname = (char*)malloc(sizeof(char) * 256);
gethostname(hostname, 256);

char* fileOutput = (char*)malloc(sizeof(char) * 256);
snprintf(fileOutput, 256, "%s-%d.dat", hostname, (int)getpid());
snprintf(fileOutput, 256, "%s-%d.%s", hostname, (int)getpid(),
kokkos_tools_timer_json ? "json" : "dat");

free(hostname);
FILE* output_data = fopen(fileOutput, "wb");

const double totalExecuteTime = (finishTime - initTime);
fwrite(&totalExecuteTime, sizeof(totalExecuteTime), 1, output_data);
if (!kokkos_tools_timer_json) {
fwrite(&totalExecuteTime, sizeof(totalExecuteTime), 1, output_data);

std::vector<KernelPerformanceInfo*> kernelList;
for (auto kernel_itr = count_map.begin(); kernel_itr != count_map.end();
kernel_itr++) {
kernel_itr->second->writeToBinaryFile(output_data);
}
} else {
std::vector<KernelPerformanceInfo*> kernelList;

for (auto kernel_itr = count_map.begin(); kernel_itr != count_map.end();
kernel_itr++) {
kernelList.push_back(kernel_itr->second);
kernelTimes += kernel_itr->second->getTime();
}

std::sort(kernelList.begin(), kernelList.end(),
blegouix marked this conversation as resolved.
Show resolved Hide resolved
compareKernelPerformanceInfo);

fprintf(output_data, "{\n\"kokkos-kernel-data\" : {\n");
fprintf(output_data, " \"total-app-time\" : %10.3f,\n",
totalExecuteTime);
fprintf(output_data, " \"total-kernel-times\" : %10.3f,\n",
kernelTimes);
fprintf(output_data, " \"total-non-kernel-times\" : %10.3f,\n",
(totalExecuteTime - kernelTimes));

const double percentKokkos = (kernelTimes / totalExecuteTime) * 100.0;
fprintf(output_data, " \"percent-in-kernels\" : %6.2f,\n",
Comment on lines +103 to +109
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure that aggregates should be part of the JSON report.

It makes sense that "console" reports do aggregate times so the user is presented with a nice summary.

But IMO JSON output should be as raw as possible, since I guess the sole purpose of JSON format is to post-process results somewhere else in a user-specific way.

I think it is not valuable that Kokkos Tools adds aggregated metrics in the JSON output...

Copy link
Contributor Author

@blegouix blegouix Feb 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe but it is not in the scope of the MR. It was already there in the timer_json.cpp

(but actually as a user I would say that if you remove aggregated metrics in the output you should provide an easy way to post-process automatically (like execute python script when kokkos finalizes), otherwise it makes things more complicated than they should for people who just want to monitor the performance of their code in development stage)

percentKokkos);
fprintf(output_data, " \"unique-kernel-calls\" : %22llu,\n",
(unsigned long long)count_map.size());
fprintf(output_data, "\n");

fprintf(output_data, " \"region-perf-info\" : [\n");

#define KERNEL_INFO_INDENT " "

bool print_comma = false;
for (auto const& kernel : count_map) {
if (!is_region(*std::get<1>(kernel))) continue;
if (print_comma) fprintf(output_data, ",\n");
kernel.second->writeToJSONFile(output_data, KERNEL_INFO_INDENT);
print_comma = true;
}
dalg24 marked this conversation as resolved.
Show resolved Hide resolved

fprintf(output_data, "\n");
fprintf(output_data, " ],\n");

fprintf(output_data, " \"kernel-perf-info\" : [\n");

print_comma = false;
for (auto const& kernel : count_map) {
if (is_region(*std::get<1>(kernel))) continue;
if (print_comma) fprintf(output_data, ",\n");
kernel.second->writeToJSONFile(output_data, KERNEL_INFO_INDENT);
print_comma = true;
}

fprintf(output_data, "\n");
fprintf(output_data, " ]\n");

for (auto kernel_itr = count_map.begin(); kernel_itr != count_map.end();
kernel_itr++) {
kernel_itr->second->writeToBinaryFile(output_data);
fprintf(output_data, "}\n}");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice to have a test for this. Otherwise, how could a reviewer be sure that nothing broke ?

}

fclose(output_data);
Expand Down
191 changes: 0 additions & 191 deletions profiling/simple-kernel-timer/kp_kernel_timer_json.cpp

This file was deleted.

Loading