Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dynamic expansion of thread data #294

Merged
merged 32 commits into from
Oct 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
c8454d3
Tests for exceeding OMNITRACE_MAX_THREADS
jrmadsen Jun 22, 2023
3ccd340
CMake Formatting.cmake update
jrmadsen Jun 22, 2023
046cc41
Add unknown-hash= to OMNITRACE_ABORT_FAIL_REGEX
jrmadsen Jun 22, 2023
f4b6160
Tests for exceeding OMNITRACE_MAX_THREADS
jrmadsen Jun 30, 2023
b32a2d2
omnitrace-sample update
jrmadsen Jun 30, 2023
3398838
core library update
jrmadsen Jun 30, 2023
b7f57f8
core library update (containers)
jrmadsen Jun 30, 2023
5d864b0
thread_info update
jrmadsen Jun 30, 2023
7e54dd1
thread_data update
jrmadsen Jun 30, 2023
926bf2a
causal update
jrmadsen Jun 30, 2023
978f40b
backtrace/backtrace_metrics component update
jrmadsen Jun 30, 2023
a06c5c8
pthread_gotcha component update
jrmadsen Jun 30, 2023
0a95cbb
pthread_create_gotcha component update
jrmadsen Jun 30, 2023
1bc41cb
rocprofiler/roctracer component update
jrmadsen Jun 30, 2023
5a59ed1
critical trace (library) update
jrmadsen Jun 30, 2023
dcc3a0f
coverage update
jrmadsen Jun 30, 2023
ac4de46
tasking update
jrmadsen Jun 30, 2023
caee9cf
roctracer update
jrmadsen Jun 30, 2023
7bc46d3
rocm_smi update
jrmadsen Jun 30, 2023
b86dfc4
runtime.cpp update
jrmadsen Jun 30, 2023
f578990
sampling.cpp update
jrmadsen Jun 30, 2023
19e3539
ompt.cpp update
jrmadsen Jun 30, 2023
8c97658
tracing.hpp and tracing.cpp update
jrmadsen Jun 30, 2023
fbc0694
library.cpp update
jrmadsen Jun 30, 2023
ac18284
Update BuildSettings.cmake
jrmadsen Jun 30, 2023
379263f
Update fork example
jrmadsen Jun 30, 2023
72ff175
Update lib/common/defines.h.in
jrmadsen Jun 30, 2023
a6bf846
Update rocprofiler.hpp
jrmadsen Jun 30, 2023
6016fd0
Update aligned_static_vector
jrmadsen Jun 30, 2023
914cd53
Update tracing.cpp
jrmadsen Jun 30, 2023
4771355
Update pthread_create_gotcha.cpp
jrmadsen Jun 30, 2023
150703c
Update causal end to end tests
jrmadsen Jun 30, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/BuildSettings.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ endif()
#
add_flag_if_avail(
"-W" "-Wall" "-Wno-unknown-pragmas" "-Wno-unused-function" "-Wno-ignored-attributes"
"-Wno-attributes" "-Wno-missing-field-initializers")
"-Wno-attributes" "-Wno-missing-field-initializers" "-Wno-interference-size")

if(OMNITRACE_BUILD_DEBUG)
add_flag_if_avail("-g3" "-fno-omit-frame-pointer")
Expand Down
3 changes: 3 additions & 0 deletions cmake/Formatting.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ if(OMNITRACE_CLANG_FORMAT_EXE
file(GLOB_RECURSE examples ${PROJECT_SOURCE_DIR}/examples/*.cpp
${PROJECT_SOURCE_DIR}/examples/*.c ${PROJECT_SOURCE_DIR}/examples/*.hpp
${PROJECT_SOURCE_DIR}/examples/*.h)
file(GLOB_RECURSE tests_source ${PROJECT_SOURCE_DIR}/tests/source/*.cpp
${PROJECT_SOURCE_DIR}/tests/source/*.hpp)
file(GLOB_RECURSE external ${PROJECT_SOURCE_DIR}/examples/lulesh/external/kokkos/*)
file(
GLOB_RECURSE
Expand All @@ -86,6 +88,7 @@ if(OMNITRACE_CLANG_FORMAT_EXE
add_custom_target(
format-omnitrace-source
${OMNITRACE_CLANG_FORMAT_EXE} -i ${sources} ${headers} ${examples}
${tests_source}
COMMENT "[omnitrace] Running C++ formatter ${OMNITRACE_CLANG_FORMAT_EXE}...")
endif()

Expand Down
102 changes: 72 additions & 30 deletions examples/fork/fork.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <pthread.h>
#include <set>
#include <string>
#include <sys/wait.h>
#include <thread>
Expand All @@ -24,71 +26,111 @@ print_info(const char* _name)
int
run(const char* _name, int nchildren)
{
auto _threads = std::vector<std::thread>{};
auto _barrier = pthread_barrier_t{};
auto _threads = std::vector<std::thread>{};
auto _children = std::vector<pid_t>{};
_children.resize(nchildren, 0);
pthread_barrier_init(&_barrier, nullptr, nchildren + 1);
for(int i = 0; i < nchildren; ++i)
{
omnitrace_user_push_region("launch_child");
auto _run = [i, _name]() {
pid_t _pid = fork();
if(_pid == 0)
auto _run = [&_barrier, &_children, i, _name](uint64_t _nsec) {
pthread_barrier_wait(&_barrier);
_children.at(i) = fork();
if(_children.at(i) == 0)
{
// child code
print_info(_name);
printf("[%s][%i] child job starting...\n", _name, getpid());
auto _sleep = [=]() {
std::this_thread::sleep_for(std::chrono::seconds{ i + 1 });
omnitrace_user_push_region("child_process_child_thread");
std::this_thread::sleep_for(std::chrono::seconds{ _nsec });
omnitrace_user_pop_region("child_process_child_thread");
};
omnitrace_user_push_region("child_process");
std::thread{ _sleep }.join();
omnitrace_user_push_region("child_process");
printf("[%s][%i] child job complete\n", _name, getpid());
exit(EXIT_SUCCESS);
}
else
{
pthread_barrier_wait(&_barrier);
}
};
_threads.emplace_back(_run);
_threads.emplace_back(_run, i + 1);
omnitrace_user_pop_region("launch_child");
}

// all child threads should start executing their fork once this returns
pthread_barrier_wait(&_barrier);
// wait for the threads to successfully fork
pthread_barrier_wait(&_barrier);

omnitrace_user_push_region("wait_for_children");

int _status = 0;
pid_t _wait_pid = 0;
// parent waits for all the child processes
while((_wait_pid = wait(&_status)) > 0)
for(auto& itr : _children)
{
printf("[%s][%i] returned from wait with pid = %i :: ", _name, getpid(),
_wait_pid);
if(WIFEXITED(_status))
{
printf("exited, status=%d\n", WEXITSTATUS(_status));
}
else if(WIFSIGNALED(_status))
{
printf("killed by signal %d\n", WTERMSIG(_status));
}
else if(WIFSTOPPED(_status))
while(itr == 0)
{}
printf("[%s][%i] performing waitpid(%i, ...)\n", _name, getpid(), itr);
while((_wait_pid = waitpid(itr, &_status, WUNTRACED | WNOHANG)) <= 0)
{
printf("stopped by signal %d\n", WSTOPSIG(_status));
}
else if(WIFCONTINUED(_status))
{
printf("continued\n");
}
else
{
printf("unknown\n");
if(_wait_pid == 0) continue;

printf("[%s][%i] returned from waitpid(%i) with pid = %i (status = %i) :: ",
_name, getpid(), itr, _wait_pid, _status);
if(WIFEXITED(_status))
{
printf("exited, status=%d\n", WEXITSTATUS(_status));
}
else if(WIFSIGNALED(_status))
{
printf("killed by signal %d\n", WTERMSIG(_status));
}
else if(WIFSTOPPED(_status))
{
printf("stopped by signal %d\n", WSTOPSIG(_status));
}
else if(WIFCONTINUED(_status))
{
printf("continued\n");
}
else
{
printf("unknown\n");
}
}
}

printf("[%s][%i] joining threads ...\n", _name, getpid());
for(auto& itr : _threads)
itr.join();

omnitrace_user_pop_region("wait_for_children");

printf("[%s][%i] returning (error code: %i) ...\n", _name, getpid(), _status);
return _status;
}

int
main(int argc, char** argv)
{
int _n = 4;
if(argc > 1) _n = std::stoi(argv[1]);
int _nfork = 4;
int _nrep = 1;
if(argc > 1) _nfork = std::stoi(argv[1]);
if(argc > 2) _nrep = std::stoi(argv[2]);

print_info(argv[0]);
return run(argv[0], _n);
for(int i = 0; i < _nrep; ++i)
{
auto _ec = run(argv[0], _nfork);
if(_ec != 0) return _ec;
}

printf("[%s][%i] job complete\n", argv[0], getpid());
return EXIT_SUCCESS;
}
6 changes: 0 additions & 6 deletions source/bin/omnitrace-sample/impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,12 +140,6 @@ get_initial_environment()
auto _mode = get_env<std::string>("OMNITRACE_MODE", "sampling", false);

update_env(_env, "OMNITRACE_USE_SAMPLING", (_mode != "causal"));
update_env(_env, "OMNITRACE_CRITICAL_TRACE", false);
update_env(_env, "OMNITRACE_USE_PROCESS_SAMPLING", false);

// update_env(_env, "OMNITRACE_USE_PID", false);
// update_env(_env, "OMNITRACE_TIME_OUTPUT", false);
// update_env(_env, "OMNITRACE_OUTPUT_PATH", "omnitrace-output/%tag%/%launch_time%");

#if defined(OMNITRACE_USE_ROCTRACER) || defined(OMNITRACE_USE_ROCPROFILER)
update_env(_env, "HSA_TOOLS_LIB", _dl_libpath);
Expand Down
5 changes: 3 additions & 2 deletions source/bin/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
set(OMNITRACE_ABORT_FAIL_REGEX
"### ERROR ###|address of faulting memory reference|exiting with non-zero exit code|terminate called after throwing an instance|calling abort.. in |Exit code: [1-9]"
CACHE INTERNAL "Regex to catch abnormal exits when a PASS_REGULAR_EXPRESSION is set")
"### ERROR ###|unknown-hash=|address of faulting memory reference|exiting with non-zero exit code|terminate called after throwing an instance|calling abort.. in |Exit code: [1-9]"
CACHE INTERNAL "Regex to catch abnormal exits when a PASS_REGULAR_EXPRESSION is set"
FORCE)

# adds a ctest for executable
function(OMNITRACE_ADD_BIN_TEST)
Expand Down
33 changes: 25 additions & 8 deletions source/lib/common/defines.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,17 @@
#define OMNITRACE_HIP_VERSION_MAJOR @OMNITRACE_HIP_VERSION_MAJOR@
#define OMNITRACE_HIP_VERSION_MINOR @OMNITRACE_HIP_VERSION_MINOR@
#define OMNITRACE_HIP_VERSION_PATCH @OMNITRACE_HIP_VERSION_PATCH@

// these can be set via defining the variable in CMake, e.g.:
// cmake -D OMNITRACE_CACHELINE_SIZE=N /path/to/source
// if not defined when configuring cmake, these values fall back to
// default values set in core/containers/aligned_static_vector.hpp.
// the OMNITRACE_CACHELINE_SIZE_MIN is used to ensure portability
#cmakedefine OMNITRACE_CACHELINE_SIZE @OMNITRACE_CACHELINE_SIZE@
#cmakedefine OMNITRACE_CACHELINE_SIZE_MIN @OMNITRACE_CACHELINE_SIZE_MIN@

// misc definitions which can be configured by cmake to override the defaults
#cmakedefine OMNITRACE_ROCM_MAX_COUNTERS @OMNITRACE_ROCM_MAX_COUNTERS@
// clang-format on

#define OMNITRACE_VERSION \
Expand Down Expand Up @@ -87,16 +98,22 @@
#endif
// clang-format on

#if !defined(OMNITRACE_MAX_COUNTERS)
# define OMNITRACE_MAX_COUNTERS 25
#endif

#if !defined(OMNITRACE_ROCM_LOOK_AHEAD)
# define OMNITRACE_ROCM_LOOK_AHEAD 128
// in general, we want to make sure the cache line size is not less than
// 64 bytes (most common cacheline size for x86-64 CPUs) so unless
// OMNITRACE_CACHELINE_SIZE was explicitly set, we set the min to 64
// and use the max value of OMNITRACE_CACHELINE_SIZE and
// OMNITRACE_CACHELINE_SIZE_MIN to assure that false-sharing is well
// guarded against
#if !defined(OMNITRACE_CACHELINE_SIZE_MIN)
# if defined(OMNITRACE_CACHELINE_SIZE)
# define OMNITRACE_CACHELINE_SIZE_MIN OMNITRACE_CACHELINE_SIZE
# else
# define OMNITRACE_CACHELINE_SIZE_MIN 64
# endif
#endif

#if !defined(OMNITRACE_MAX_ROCM_QUEUES)
# define OMNITRACE_MAX_ROCM_QUEUES OMNITRACE_MAX_THREADS
#if !defined(OMNITRACE_ROCM_MAX_COUNTERS)
# define OMNITRACE_ROCM_MAX_COUNTERS 25
#endif

#define OMNITRACE_ATTRIBUTE(...) __attribute__((__VA_ARGS__))
Expand Down
2 changes: 1 addition & 1 deletion source/lib/core/binary/address_range.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ address_range::operator+=(address_range _v)
hash_value_t
address_range::hash() const
{
return (is_range()) ? tim::get_combined_hash_id(hash_value_t{ low }, high)
return (is_range()) ? tim::get_hash_id(hash_value_t{ low }, high)
: hash_value_t{ low };
}
} // namespace binary
Expand Down
7 changes: 7 additions & 0 deletions source/lib/core/concepts.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,13 @@ using tim::identity_t; // NOLINT
template <typename Tp>
struct use_placement_new_when_generating_unique_ptr : std::false_type
{};

template <typename Tp, typename... Args>
auto
make_unique(Args&&... args)
{
return unique_ptr_t<Tp>{ new Tp{ std::forward<Args>(args)... } };
}
} // namespace omnitrace

namespace tim
Expand Down
3 changes: 1 addition & 2 deletions source/lib/core/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ configure_settings(bool _init)
OMNITRACE_CONFIG_SETTING(
bool, "OMNITRACE_USE_ROCM_SMI",
"Enable sampling GPU power, temp, utilization, and memory usage", true, "backend",
"rocm_smi", "rocm");
"rocm_smi", "rocm", "process_sampling");

OMNITRACE_CONFIG_SETTING(
bool, "OMNITRACE_USE_ROCTX",
Expand Down Expand Up @@ -1154,7 +1154,6 @@ configure_mode_settings(const std::shared_ptr<settings>& _config)
{
set_default_setting_value("OMNITRACE_USE_SAMPLING", true);
set_default_setting_value("OMNITRACE_USE_PROCESS_SAMPLING", true);
_set("OMNITRACE_CRITICAL_TRACE", false);
}

if(gpu::device_count() == 0)
Expand Down
8 changes: 6 additions & 2 deletions source/lib/core/containers/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
#
set(containers_sources)

set(containers_headers ${CMAKE_CURRENT_LIST_DIR}/stable_vector.hpp
${CMAKE_CURRENT_LIST_DIR}/static_vector.hpp)
set(containers_headers
${CMAKE_CURRENT_LIST_DIR}/aligned_static_vector.hpp
${CMAKE_CURRENT_LIST_DIR}/c_array.hpp
${CMAKE_CURRENT_LIST_DIR}/operators.hpp
${CMAKE_CURRENT_LIST_DIR}/stable_vector.hpp
${CMAKE_CURRENT_LIST_DIR}/static_vector.hpp)

target_sources(omnitrace-core-library PRIVATE ${containers_sources} ${containers_headers})
Loading
Loading