Skip to content

Commit

Permalink
Merge branch 'main' into pytest-v2
Browse files Browse the repository at this point in the history
  • Loading branch information
jrmadsen authored Oct 16, 2023
2 parents 4992b21 + 518c83e commit c853994
Show file tree
Hide file tree
Showing 43 changed files with 1,232 additions and 434 deletions.
2 changes: 1 addition & 1 deletion cmake/BuildSettings.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ endif()
#
add_flag_if_avail(
"-W" "-Wall" "-Wno-unknown-pragmas" "-Wno-unused-function" "-Wno-ignored-attributes"
"-Wno-attributes" "-Wno-missing-field-initializers")
"-Wno-attributes" "-Wno-missing-field-initializers" "-Wno-interference-size")

if(OMNITRACE_BUILD_DEBUG)
add_flag_if_avail("-g3" "-fno-omit-frame-pointer")
Expand Down
3 changes: 3 additions & 0 deletions cmake/Formatting.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ if(OMNITRACE_CLANG_FORMAT_EXE
file(GLOB_RECURSE examples ${PROJECT_SOURCE_DIR}/examples/*.cpp
${PROJECT_SOURCE_DIR}/examples/*.c ${PROJECT_SOURCE_DIR}/examples/*.hpp
${PROJECT_SOURCE_DIR}/examples/*.h)
file(GLOB_RECURSE tests_source ${PROJECT_SOURCE_DIR}/tests/source/*.cpp
${PROJECT_SOURCE_DIR}/tests/source/*.hpp)
file(GLOB_RECURSE external ${PROJECT_SOURCE_DIR}/examples/lulesh/external/kokkos/*)
file(
GLOB_RECURSE
Expand All @@ -86,6 +88,7 @@ if(OMNITRACE_CLANG_FORMAT_EXE
add_custom_target(
format-omnitrace-source
${OMNITRACE_CLANG_FORMAT_EXE} -i ${sources} ${headers} ${examples}
${tests_source}
COMMENT "[omnitrace] Running C++ formatter ${OMNITRACE_CLANG_FORMAT_EXE}...")
endif()

Expand Down
102 changes: 72 additions & 30 deletions examples/fork/fork.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <pthread.h>
#include <set>
#include <string>
#include <sys/wait.h>
#include <thread>
Expand All @@ -24,71 +26,111 @@ print_info(const char* _name)
int
run(const char* _name, int nchildren)
{
auto _threads = std::vector<std::thread>{};
auto _barrier = pthread_barrier_t{};
auto _threads = std::vector<std::thread>{};
auto _children = std::vector<pid_t>{};
_children.resize(nchildren, 0);
pthread_barrier_init(&_barrier, nullptr, nchildren + 1);
for(int i = 0; i < nchildren; ++i)
{
omnitrace_user_push_region("launch_child");
auto _run = [i, _name]() {
pid_t _pid = fork();
if(_pid == 0)
auto _run = [&_barrier, &_children, i, _name](uint64_t _nsec) {
pthread_barrier_wait(&_barrier);
_children.at(i) = fork();
if(_children.at(i) == 0)
{
// child code
print_info(_name);
printf("[%s][%i] child job starting...\n", _name, getpid());
auto _sleep = [=]() {
std::this_thread::sleep_for(std::chrono::seconds{ i + 1 });
omnitrace_user_push_region("child_process_child_thread");
std::this_thread::sleep_for(std::chrono::seconds{ _nsec });
omnitrace_user_pop_region("child_process_child_thread");
};
omnitrace_user_push_region("child_process");
std::thread{ _sleep }.join();
omnitrace_user_push_region("child_process");
printf("[%s][%i] child job complete\n", _name, getpid());
exit(EXIT_SUCCESS);
}
else
{
pthread_barrier_wait(&_barrier);
}
};
_threads.emplace_back(_run);
_threads.emplace_back(_run, i + 1);
omnitrace_user_pop_region("launch_child");
}

// all child threads should start executing their fork once this returns
pthread_barrier_wait(&_barrier);
// wait for the threads to successfully fork
pthread_barrier_wait(&_barrier);

omnitrace_user_push_region("wait_for_children");

int _status = 0;
pid_t _wait_pid = 0;
// parent waits for all the child processes
while((_wait_pid = wait(&_status)) > 0)
for(auto& itr : _children)
{
printf("[%s][%i] returned from wait with pid = %i :: ", _name, getpid(),
_wait_pid);
if(WIFEXITED(_status))
{
printf("exited, status=%d\n", WEXITSTATUS(_status));
}
else if(WIFSIGNALED(_status))
{
printf("killed by signal %d\n", WTERMSIG(_status));
}
else if(WIFSTOPPED(_status))
while(itr == 0)
{}
printf("[%s][%i] performing waitpid(%i, ...)\n", _name, getpid(), itr);
while((_wait_pid = waitpid(itr, &_status, WUNTRACED | WNOHANG)) <= 0)
{
printf("stopped by signal %d\n", WSTOPSIG(_status));
}
else if(WIFCONTINUED(_status))
{
printf("continued\n");
}
else
{
printf("unknown\n");
if(_wait_pid == 0) continue;

printf("[%s][%i] returned from waitpid(%i) with pid = %i (status = %i) :: ",
_name, getpid(), itr, _wait_pid, _status);
if(WIFEXITED(_status))
{
printf("exited, status=%d\n", WEXITSTATUS(_status));
}
else if(WIFSIGNALED(_status))
{
printf("killed by signal %d\n", WTERMSIG(_status));
}
else if(WIFSTOPPED(_status))
{
printf("stopped by signal %d\n", WSTOPSIG(_status));
}
else if(WIFCONTINUED(_status))
{
printf("continued\n");
}
else
{
printf("unknown\n");
}
}
}

printf("[%s][%i] joining threads ...\n", _name, getpid());
for(auto& itr : _threads)
itr.join();

omnitrace_user_pop_region("wait_for_children");

printf("[%s][%i] returning (error code: %i) ...\n", _name, getpid(), _status);
return _status;
}

int
main(int argc, char** argv)
{
int _n = 4;
if(argc > 1) _n = std::stoi(argv[1]);
int _nfork = 4;
int _nrep = 1;
if(argc > 1) _nfork = std::stoi(argv[1]);
if(argc > 2) _nrep = std::stoi(argv[2]);

print_info(argv[0]);
return run(argv[0], _n);
for(int i = 0; i < _nrep; ++i)
{
auto _ec = run(argv[0], _nfork);
if(_ec != 0) return _ec;
}

printf("[%s][%i] job complete\n", argv[0], getpid());
return EXIT_SUCCESS;
}
6 changes: 0 additions & 6 deletions source/bin/omnitrace-sample/impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,12 +140,6 @@ get_initial_environment()
auto _mode = get_env<std::string>("OMNITRACE_MODE", "sampling", false);

update_env(_env, "OMNITRACE_USE_SAMPLING", (_mode != "causal"));
update_env(_env, "OMNITRACE_CRITICAL_TRACE", false);
update_env(_env, "OMNITRACE_USE_PROCESS_SAMPLING", false);

// update_env(_env, "OMNITRACE_USE_PID", false);
// update_env(_env, "OMNITRACE_TIME_OUTPUT", false);
// update_env(_env, "OMNITRACE_OUTPUT_PATH", "omnitrace-output/%tag%/%launch_time%");

#if defined(OMNITRACE_USE_ROCTRACER) || defined(OMNITRACE_USE_ROCPROFILER)
update_env(_env, "HSA_TOOLS_LIB", _dl_libpath);
Expand Down
5 changes: 3 additions & 2 deletions source/bin/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
set(OMNITRACE_ABORT_FAIL_REGEX
"### ERROR ###|address of faulting memory reference|exiting with non-zero exit code|terminate called after throwing an instance|calling abort.. in |Exit code: [1-9]"
CACHE INTERNAL "Regex to catch abnormal exits when a PASS_REGULAR_EXPRESSION is set")
"### ERROR ###|unknown-hash=|address of faulting memory reference|exiting with non-zero exit code|terminate called after throwing an instance|calling abort.. in |Exit code: [1-9]"
CACHE INTERNAL "Regex to catch abnormal exits when a PASS_REGULAR_EXPRESSION is set"
FORCE)

# adds a ctest for executable
function(OMNITRACE_ADD_BIN_TEST)
Expand Down
33 changes: 25 additions & 8 deletions source/lib/common/defines.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,17 @@
#define OMNITRACE_HIP_VERSION_MAJOR @OMNITRACE_HIP_VERSION_MAJOR@
#define OMNITRACE_HIP_VERSION_MINOR @OMNITRACE_HIP_VERSION_MINOR@
#define OMNITRACE_HIP_VERSION_PATCH @OMNITRACE_HIP_VERSION_PATCH@

// these can be set via defining the variable in CMake, e.g.:
// cmake -D OMNITRACE_CACHELINE_SIZE=N /path/to/source
// if not defined when configuring cmake, these values fall back to
// default values set in core/containers/aligned_static_vector.hpp.
// the OMNITRACE_CACHELINE_SIZE_MIN is used to ensure portability
#cmakedefine OMNITRACE_CACHELINE_SIZE @OMNITRACE_CACHELINE_SIZE@
#cmakedefine OMNITRACE_CACHELINE_SIZE_MIN @OMNITRACE_CACHELINE_SIZE_MIN@

// misc definitions which can be configured by cmake to override the defaults
#cmakedefine OMNITRACE_ROCM_MAX_COUNTERS @OMNITRACE_ROCM_MAX_COUNTERS@
// clang-format on

#define OMNITRACE_VERSION \
Expand Down Expand Up @@ -87,16 +98,22 @@
#endif
// clang-format on

#if !defined(OMNITRACE_MAX_COUNTERS)
# define OMNITRACE_MAX_COUNTERS 25
#endif

#if !defined(OMNITRACE_ROCM_LOOK_AHEAD)
# define OMNITRACE_ROCM_LOOK_AHEAD 128
// in general, we want to make sure the cache line size is not less than
// 64 bytes (most common cacheline size for x86-64 CPUs) so unless
// OMNITRACE_CACHELINE_SIZE was explicitly set, we set the min to 64
// and use the max value of OMNITRACE_CACHELINE_SIZE and
// OMNITRACE_CACHELINE_SIZE_MIN to assure that false-sharing is well
// guarded against
#if !defined(OMNITRACE_CACHELINE_SIZE_MIN)
# if defined(OMNITRACE_CACHELINE_SIZE)
# define OMNITRACE_CACHELINE_SIZE_MIN OMNITRACE_CACHELINE_SIZE
# else
# define OMNITRACE_CACHELINE_SIZE_MIN 64
# endif
#endif

#if !defined(OMNITRACE_MAX_ROCM_QUEUES)
# define OMNITRACE_MAX_ROCM_QUEUES OMNITRACE_MAX_THREADS
#if !defined(OMNITRACE_ROCM_MAX_COUNTERS)
# define OMNITRACE_ROCM_MAX_COUNTERS 25
#endif

#define OMNITRACE_ATTRIBUTE(...) __attribute__((__VA_ARGS__))
Expand Down
2 changes: 1 addition & 1 deletion source/lib/core/binary/address_range.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ address_range::operator+=(address_range _v)
hash_value_t
address_range::hash() const
{
return (is_range()) ? tim::get_combined_hash_id(hash_value_t{ low }, high)
return (is_range()) ? tim::get_hash_id(hash_value_t{ low }, high)
: hash_value_t{ low };
}
} // namespace binary
Expand Down
7 changes: 7 additions & 0 deletions source/lib/core/concepts.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,13 @@ using tim::identity_t; // NOLINT
template <typename Tp>
struct use_placement_new_when_generating_unique_ptr : std::false_type
{};

template <typename Tp, typename... Args>
auto
make_unique(Args&&... args)
{
return unique_ptr_t<Tp>{ new Tp{ std::forward<Args>(args)... } };
}
} // namespace omnitrace

namespace tim
Expand Down
3 changes: 1 addition & 2 deletions source/lib/core/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ configure_settings(bool _init)
OMNITRACE_CONFIG_SETTING(
bool, "OMNITRACE_USE_ROCM_SMI",
"Enable sampling GPU power, temp, utilization, and memory usage", true, "backend",
"rocm_smi", "rocm");
"rocm_smi", "rocm", "process_sampling");

OMNITRACE_CONFIG_SETTING(
bool, "OMNITRACE_USE_ROCTX",
Expand Down Expand Up @@ -1154,7 +1154,6 @@ configure_mode_settings(const std::shared_ptr<settings>& _config)
{
set_default_setting_value("OMNITRACE_USE_SAMPLING", true);
set_default_setting_value("OMNITRACE_USE_PROCESS_SAMPLING", true);
_set("OMNITRACE_CRITICAL_TRACE", false);
}

if(gpu::device_count() == 0)
Expand Down
8 changes: 6 additions & 2 deletions source/lib/core/containers/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
#
set(containers_sources)

set(containers_headers ${CMAKE_CURRENT_LIST_DIR}/stable_vector.hpp
${CMAKE_CURRENT_LIST_DIR}/static_vector.hpp)
set(containers_headers
${CMAKE_CURRENT_LIST_DIR}/aligned_static_vector.hpp
${CMAKE_CURRENT_LIST_DIR}/c_array.hpp
${CMAKE_CURRENT_LIST_DIR}/operators.hpp
${CMAKE_CURRENT_LIST_DIR}/stable_vector.hpp
${CMAKE_CURRENT_LIST_DIR}/static_vector.hpp)

target_sources(omnitrace-core-library PRIVATE ${containers_sources} ${containers_headers})
Loading

0 comments on commit c853994

Please sign in to comment.