diff --git a/build-all.sh b/build-all.sh index 5833313d6..993c2b011 100644 --- a/build-all.sh +++ b/build-all.sh @@ -12,7 +12,6 @@ make -f $ROOT_DIR/profiling/memory-usage/Makefile make -f $ROOT_DIR/profiling/nvtx-connector/Makefile make -f $ROOT_DIR/profiling/nvtx-focused-connector/Makefile make -f $ROOT_DIR/profiling/papi-connector/Makefile -make -f $ROOT_DIR/profiling/simple-kernel-timer-json/Makefile make -f $ROOT_DIR/profiling/simple-kernel-timer/Makefile make -f $ROOT_DIR/profiling/space-time-stack/Makefile make -f $ROOT_DIR/profiling/systemtap-connector/Makefile diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index 88b3b5ac4..41980d163 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -21,7 +21,6 @@ endmacro() # and exported output in expected format, fail the test otherwise. if(NOT WIN32) add_kp_test(kernel_timer "kernel-timer") - add_kp_test(kernel_timer_json "kernel-timer-json") add_kp_test(memory_events "memory-events") add_kp_test(memory_usage "memory-usage") add_kp_test(chrome_tracing "chrome-tracing") diff --git a/profiling/all/kp_all.cpp b/profiling/all/kp_all.cpp index 1c8691274..52bc9bad9 100644 --- a/profiling/all/kp_all.cpp +++ b/profiling/all/kp_all.cpp @@ -30,7 +30,6 @@ #ifndef WIN32 KOKKOSTOOLS_EXTERN_EVENT_SET(KernelTimer) -KOKKOSTOOLS_EXTERN_EVENT_SET(KernelTimerJSON) KOKKOSTOOLS_EXTERN_EVENT_SET(MemoryEvents) KOKKOSTOOLS_EXTERN_EVENT_SET(MemoryUsage) KOKKOSTOOLS_EXTERN_EVENT_SET(HighwaterMark) @@ -69,10 +68,9 @@ namespace KokkosTools { EventSet get_event_set(const char* profiler, const char* config_str) { std::map handlers; #ifndef WIN32 - handlers["kernel-timer"] = KernelTimer::get_event_set(); - handlers["kernel-timer-json"] = KernelTimerJSON::get_event_set(); - handlers["memory-events"] = MemoryEvents::get_event_set(); - handlers["memory-usage"] = MemoryUsage::get_event_set(); + handlers["kernel-timer"] = KernelTimer::get_event_set(); + handlers["memory-events"] = MemoryEvents::get_event_set(); + handlers["memory-usage"] = MemoryUsage::get_event_set(); #if USE_MPI handlers["highwater-mark-mpi"] = HighwaterMarkMPI::get_event_set(); #endif diff --git a/profiling/simple-kernel-timer/CMakeLists.txt b/profiling/simple-kernel-timer/CMakeLists.txt index e512a1d67..ebd05a6a8 100644 --- a/profiling/simple-kernel-timer/CMakeLists.txt +++ b/profiling/simple-kernel-timer/CMakeLists.txt @@ -7,10 +7,6 @@ if(NOT MSVC) set_property(TARGET kp_kernel_shared PROPERTY POSITION_INDEPENDENT_CODE ON) endif() -# Add JSON kernel-timer -kp_add_library(kp_kernel_timer_json kp_kernel_timer_json.cpp) -target_link_libraries(kp_kernel_timer_json PRIVATE kp_kernel_shared) - # Add binary kernel-timer kp_add_library(kp_kernel_timer kp_kernel_timer.cpp) target_link_libraries(kp_kernel_timer PRIVATE kp_kernel_shared) diff --git a/profiling/simple-kernel-timer/kp_kernel_timer.cpp b/profiling/simple-kernel-timer/kp_kernel_timer.cpp index 33187c9c4..a015f5589 100644 --- a/profiling/simple-kernel-timer/kp_kernel_timer.cpp +++ b/profiling/simple-kernel-timer/kp_kernel_timer.cpp @@ -15,6 +15,7 @@ //@HEADER #include +#include #include #include #include @@ -25,6 +26,10 @@ namespace KokkosTools { namespace KernelTimer { +bool is_region(KernelPerformanceInfo const& kp) { + return kp.getKernelType() == REGION; +} + void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, const uint32_t /*devInfoCount*/, Kokkos_Profiling_KokkosPDeviceInfo* /*deviceInfo*/) { @@ -52,23 +57,90 @@ void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, void kokkosp_finalize_library() { double finishTime = seconds(); + const char* kokkos_tools_timer_json_raw = getenv("KOKKOS_TOOLS_TIMER_JSON"); + const bool kokkos_tools_timer_json = + kokkos_tools_timer_json_raw == NULL + ? false + : strcmp(kokkos_tools_timer_json_raw, "1") == 0 || + strcmp(kokkos_tools_timer_json_raw, "true") == 0 || + strcmp(kokkos_tools_timer_json_raw, "True") == 0; + + double kernelTimes = 0; + char* hostname = (char*)malloc(sizeof(char) * 256); gethostname(hostname, 256); char* fileOutput = (char*)malloc(sizeof(char) * 256); - snprintf(fileOutput, 256, "%s-%d.dat", hostname, (int)getpid()); + snprintf(fileOutput, 256, "%s-%d.%s", hostname, (int)getpid(), + kokkos_tools_timer_json ? "json" : "dat"); free(hostname); FILE* output_data = fopen(fileOutput, "wb"); const double totalExecuteTime = (finishTime - initTime); - fwrite(&totalExecuteTime, sizeof(totalExecuteTime), 1, output_data); + if (!kokkos_tools_timer_json) { + fwrite(&totalExecuteTime, sizeof(totalExecuteTime), 1, output_data); - std::vector kernelList; + for (auto kernel_itr = count_map.begin(); kernel_itr != count_map.end(); + kernel_itr++) { + kernel_itr->second->writeToBinaryFile(output_data); + } + } else { + std::vector kernelList; + + for (auto kernel_itr = count_map.begin(); kernel_itr != count_map.end(); + kernel_itr++) { + kernelList.push_back(kernel_itr->second); + kernelTimes += kernel_itr->second->getTime(); + } + + std::sort(kernelList.begin(), kernelList.end(), + compareKernelPerformanceInfo); + + fprintf(output_data, "{\n\"kokkos-kernel-data\" : {\n"); + fprintf(output_data, " \"total-app-time\" : %10.3f,\n", + totalExecuteTime); + fprintf(output_data, " \"total-kernel-times\" : %10.3f,\n", + kernelTimes); + fprintf(output_data, " \"total-non-kernel-times\" : %10.3f,\n", + (totalExecuteTime - kernelTimes)); + + const double percentKokkos = (kernelTimes / totalExecuteTime) * 100.0; + fprintf(output_data, " \"percent-in-kernels\" : %6.2f,\n", + percentKokkos); + fprintf(output_data, " \"unique-kernel-calls\" : %22llu,\n", + (unsigned long long)count_map.size()); + fprintf(output_data, "\n"); + + fprintf(output_data, " \"region-perf-info\" : [\n"); + +#define KERNEL_INFO_INDENT " " + + bool print_comma = false; + for (auto const& kernel : count_map) { + if (!is_region(*std::get<1>(kernel))) continue; + if (print_comma) fprintf(output_data, ",\n"); + kernel.second->writeToJSONFile(output_data, KERNEL_INFO_INDENT); + print_comma = true; + } + + fprintf(output_data, "\n"); + fprintf(output_data, " ],\n"); + + fprintf(output_data, " \"kernel-perf-info\" : [\n"); + + print_comma = false; + for (auto const& kernel : count_map) { + if (is_region(*std::get<1>(kernel))) continue; + if (print_comma) fprintf(output_data, ",\n"); + kernel.second->writeToJSONFile(output_data, KERNEL_INFO_INDENT); + print_comma = true; + } + + fprintf(output_data, "\n"); + fprintf(output_data, " ]\n"); - for (auto kernel_itr = count_map.begin(); kernel_itr != count_map.end(); - kernel_itr++) { - kernel_itr->second->writeToBinaryFile(output_data); + fprintf(output_data, "}\n}"); } fclose(output_data); diff --git a/profiling/simple-kernel-timer/kp_kernel_timer_json.cpp b/profiling/simple-kernel-timer/kp_kernel_timer_json.cpp deleted file mode 100644 index 859fa3d7b..000000000 --- a/profiling/simple-kernel-timer/kp_kernel_timer_json.cpp +++ /dev/null @@ -1,191 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#include -#include -#include -#include - -#include "kp_core.hpp" -#include "kp_shared.h" - -using namespace KokkosTools::KernelTimer; - -namespace KokkosTools { -namespace KernelTimerJSON { - -void kokkosp_init_library(const int loadSeq, const uint64_t interfaceVer, - const uint32_t /*devInfoCount*/, - Kokkos_Profiling_KokkosPDeviceInfo* /*deviceInfo*/) { - const char* output_delim_env = getenv("KOKKOSP_OUTPUT_DELIM"); - if (NULL == output_delim_env) { - outputDelimiter = (char*)malloc(sizeof(char) * 2); - snprintf(outputDelimiter, 2, "%c", ' '); - } else { - outputDelimiter = - (char*)malloc(sizeof(char) * (strlen(output_delim_env) + 1)); - strcpy(outputDelimiter, output_delim_env); - } - - printf( - "KokkosP: LDMS JSON Connector Initialized (sequence is %d, version: " - "%llu)\n", - loadSeq, (long long unsigned int)interfaceVer); - - initTime = seconds(); -} - -void kokkosp_finalize_library() { - double finishTime = seconds(); - double kernelTimes = 0; - - char* mpi_rank = getenv("OMPI_COMM_WORLD_RANK"); - - char* hostname = (char*)malloc(sizeof(char) * 256); - gethostname(hostname, 256); - - char* fileOutput = (char*)malloc(sizeof(char) * 256); - snprintf(fileOutput, 256, "%s-%d-%s.json", hostname, (int)getpid(), - (NULL == mpi_rank) ? "0" : mpi_rank); - - free(hostname); - FILE* output_data = fopen(fileOutput, "w"); - - const double totalExecuteTime = (finishTime - initTime); - std::vector kernelList; - - for (auto kernel_itr = count_map.begin(); kernel_itr != count_map.end(); - kernel_itr++) { - kernelList.push_back(kernel_itr->second); - kernelTimes += kernel_itr->second->getTime(); - } - - std::sort(kernelList.begin(), kernelList.end(), compareKernelPerformanceInfo); - - fprintf(output_data, "{\n\"kokkos-kernel-data\" : {\n"); - fprintf(output_data, " \"mpi-rank\" : %s,\n", - (NULL == mpi_rank) ? "0" : mpi_rank); - fprintf(output_data, " \"total-app-time\" : %10.3f,\n", - totalExecuteTime); - fprintf(output_data, " \"total-kernel-times\" : %10.3f,\n", - kernelTimes); - fprintf(output_data, " \"total-non-kernel-times\" : %10.3f,\n", - (totalExecuteTime - kernelTimes)); - - const double percentKokkos = (kernelTimes / totalExecuteTime) * 100.0; - fprintf(output_data, " \"percent-in-kernels\" : %6.2f,\n", - percentKokkos); - fprintf(output_data, " \"unique-kernel-calls\" : %22llu,\n", - (unsigned long long)count_map.size()); - fprintf(output_data, "\n"); - - fprintf(output_data, " \"kernel-perf-info\" : [\n"); - -#define KERNEL_INFO_INDENT " " - - bool print_comma = false; - for (auto const& kernel : count_map) { - if (print_comma) fprintf(output_data, ",\n"); - kernel.second->writeToJSONFile(output_data, KERNEL_INFO_INDENT); - print_comma = true; - } - - fprintf(output_data, "\n"); - fprintf(output_data, " ]\n"); - fprintf(output_data, "}\n}"); - fclose(output_data); -} - -void kokkosp_begin_parallel_for(const char* name, const uint32_t /*devID*/, - uint64_t* kID) { - *kID = uniqID++; - - if ((NULL == name) || (strcmp("", name) == 0)) { - fprintf(stderr, "Error: kernel is empty\n"); - exit(-1); - } - - increment_counter(name, PARALLEL_FOR); -} - -void kokkosp_end_parallel_for(const uint64_t /*kID*/) { - currentEntry->addFromTimer(); -} - -void kokkosp_begin_parallel_scan(const char* name, const uint32_t /*devID*/, - uint64_t* kID) { - *kID = uniqID++; - - if ((NULL == name) || (strcmp("", name) == 0)) { - fprintf(stderr, "Error: kernel is empty\n"); - exit(-1); - } - - increment_counter(name, PARALLEL_SCAN); -} - -void kokkosp_end_parallel_scan(const uint64_t /*kID*/) { - currentEntry->addFromTimer(); -} - -void kokkosp_begin_parallel_reduce(const char* name, const uint32_t /*devID*/, - uint64_t* kID) { - *kID = uniqID++; - - if ((NULL == name) || (strcmp("", name) == 0)) { - fprintf(stderr, "Error: kernel is empty\n"); - exit(-1); - } - - increment_counter(name, PARALLEL_REDUCE); -} - -void kokkosp_end_parallel_reduce(const uint64_t /*kID*/) { - currentEntry->addFromTimer(); -} - -Kokkos::Tools::Experimental::EventSet get_event_set() { - Kokkos::Tools::Experimental::EventSet my_event_set; - memset(&my_event_set, 0, - sizeof(my_event_set)); // zero any pointers not set here - my_event_set.init = kokkosp_init_library; - my_event_set.finalize = kokkosp_finalize_library; - my_event_set.begin_parallel_for = kokkosp_begin_parallel_for; - my_event_set.begin_parallel_reduce = kokkosp_begin_parallel_reduce; - my_event_set.begin_parallel_scan = kokkosp_begin_parallel_scan; - my_event_set.end_parallel_for = kokkosp_end_parallel_for; - my_event_set.end_parallel_reduce = kokkosp_end_parallel_reduce; - my_event_set.end_parallel_scan = kokkosp_end_parallel_scan; - return my_event_set; -} - -} // namespace KernelTimerJSON -} // namespace KokkosTools - -extern "C" { - -namespace impl = KokkosTools::KernelTimerJSON; - -EXPOSE_INIT(impl::kokkosp_init_library) -EXPOSE_FINALIZE(impl::kokkosp_finalize_library) -EXPOSE_BEGIN_PARALLEL_FOR(impl::kokkosp_begin_parallel_for) -EXPOSE_END_PARALLEL_FOR(impl::kokkosp_end_parallel_for) -EXPOSE_BEGIN_PARALLEL_SCAN(impl::kokkosp_begin_parallel_scan) -EXPOSE_END_PARALLEL_SCAN(impl::kokkosp_end_parallel_scan) -EXPOSE_BEGIN_PARALLEL_REDUCE(impl::kokkosp_begin_parallel_reduce) -EXPOSE_END_PARALLEL_REDUCE(impl::kokkosp_end_parallel_reduce) - -} // extern "C"