From 3c879e795a6ac5de61ead7c1de9bbcb8016d2c62 Mon Sep 17 00:00:00 2001 From: Philippe Marguinaud Date: Fri, 31 May 2024 19:21:15 +0000 Subject: [PATCH 1/7] Add NVTX support --- CMakeLists.txt | 15 ++++ cmake/project_summary.cmake | 6 ++ src/fiat/CMakeLists.txt | 14 ++++ src/fiat/drhook/internal/dr_hook_util.F90 | 29 ++++++++ src/fiat/mynvtx/map.cc | 75 +++++++++++++++++++ src/fiat/mynvtx/nvtx.F90 | 54 ++++++++++++++ src/fiat/mynvtx/nvtx.c | 90 +++++++++++++++++++++++ src/fiat/mynvtx/nvtx.h | 10 +++ 8 files changed, 293 insertions(+) create mode 100644 src/fiat/mynvtx/map.cc create mode 100644 src/fiat/mynvtx/nvtx.F90 create mode 100644 src/fiat/mynvtx/nvtx.c create mode 100644 src/fiat/mynvtx/nvtx.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 06e4dae..6903d48 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,6 +51,21 @@ ecbuild_add_option( FEATURE WARNINGS ecbuild_find_package( NAME Realtime QUIET ) +#### + +if(CMAKE_C_COMPILER_ID STREQUAL "PGI" OR CMAKE_C_COMPILER_ID STREQUAL "NVHPC" ) + + find_package(CUDAToolkit REQUIRED COMPONENTS nvtx3) + find_package(NVHPC REQUIRED COMPONENTS HOSTUTILS) + + find_library(NVTOOLSEXT_LIB NAMES nvToolsExt REQUIRED HINTS ${CUDAToolkit_LIBRARY_DIR}) + find_library(NVHPCWRAPNVTX_LIB NAMES nvhpcwrapnvtx REQUIRED HINTS ${NVHPC_HOSTUTILS_LIBRARY_DIR} ) + +endif() + + + + ### Sources include( fiat_compiler_warnings ) diff --git a/cmake/project_summary.cmake b/cmake/project_summary.cmake index e749115..c58f8b8 100644 --- a/cmake/project_summary.cmake +++ b/cmake/project_summary.cmake @@ -23,5 +23,11 @@ ecbuild_info( "MPI (export MPI_HOME to correct MPI implementation)" ) ecbuild_info( " MPI_Fortran_INCLUDE_DIRS : [${MPI_Fortran_INCLUDE_DIRS}]" ) ecbuild_info( " MPI_Fortran_LIBRARIES : [${MPI_Fortran_LIBRARIES}]" ) ecbuild_info( " MPIEXEC : [${MPIEXEC}]" ) + +if(CMAKE_C_COMPILER_ID STREQUAL "PGI" OR CMAKE_C_COMPILER_ID STREQUAL "NVHPC" ) +ecbuild_info( " nvToolsExt library from CUDAToolkit : [${NVTOOLSEXT_LIB}]" ) +ecbuild_info( " nvhpcwrapnvtx library from NVHPC : [${NVHPCWRAPNVTX_LIB}]" ) +endif() + ecbuild_info( "---------------------------------------------------------" ) diff --git a/src/fiat/CMakeLists.txt b/src/fiat/CMakeLists.txt index 6c32956..3104fe8 100644 --- a/src/fiat/CMakeLists.txt +++ b/src/fiat/CMakeLists.txt @@ -44,6 +44,12 @@ endif() configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/library/version.c.in ${CMAKE_CURRENT_BINARY_DIR}/version.c @ONLY ) ecbuild_list_add_pattern( LIST fiat_src GLOB *.c *.F* *.cc ) +if(NOT (CMAKE_C_COMPILER_ID STREQUAL "PGI" OR CMAKE_C_COMPILER_ID STREQUAL "NVHPC" )) + # The files in the mynvtx directory are only intended to work with NVHPC + # So don't try to compile them when using another compiler + ecbuild_list_exclude_pattern( LIST fiat_src REGEX mynvtx/* ) +endif() + set( fiat_src ${fiat_src} PARENT_SCOPE ) ecbuild_add_library( TARGET fiat @@ -66,6 +72,14 @@ ecbuild_add_library( TARGET fiat ) +## if compiler is pgi add two libs + +if(CMAKE_C_COMPILER_ID STREQUAL "PGI" OR CMAKE_C_COMPILER_ID STREQUAL "NVHPC" ) + target_link_libraries(fiat PUBLIC ${NVTOOLSEXT_LIB}) + target_link_libraries(fiat PUBLIC ${NVHPCWRAPNVTX_LIB}) + target_include_directories(fiat PRIVATE "${CUDAToolkit_LIBRARY_DIR}/../include") +endif() + if( ${CMAKE_SYSTEM_NAME} MATCHES "Darwin" ) # Following should not be necessary; # Probably a bug in the M1 prerelease of gfortran 10.2.0.4 diff --git a/src/fiat/drhook/internal/dr_hook_util.F90 b/src/fiat/drhook/internal/dr_hook_util.F90 index bd07cd5..491b39e 100644 --- a/src/fiat/drhook/internal/dr_hook_util.F90 +++ b/src/fiat/drhook/internal/dr_hook_util.F90 @@ -13,6 +13,10 @@ SUBROUTINE DR_HOOK_UTIL(LDHOOK,CDNAME,KCASE,PKEY,CDFILENAME,KSIZEINFO) USE OML_MOD , ONLY : OML_MY_THREAD USE YOMHOOK , ONLY : LHOOK USE DR_HACK_MOD, ONLY : LL_DRHACK, DR_HACK_INIT, DR_HACK +#ifdef __PGI +USE NVTX +USE MYNVTX +#endif IMPLICIT NONE @@ -33,6 +37,31 @@ SUBROUTINE DR_HOOK_UTIL(LDHOOK,CDNAME,KCASE,PKEY,CDFILENAME,KSIZEINFO) #include "dr_hook_init.intfb.h" +#ifdef __PGI +INTEGER, SAVE :: II_DRNVTX = 0 ! 0=no initialized, -1=nvtx off, +1=nvtx on +CHARACTER*32 :: CL_NVTX +#endif + + +#ifdef __PGI +IF (II_DRNVTX == 0) THEN + CALL GETENV ('DR_NVTX', CL_NVTX) + IF (CL_NVTX == '1') THEN + II_DRNVTX = +1 + ELSE + II_DRNVTX = -1 + ENDIF +ENDIF + +IF (II_DRNVTX == 1) THEN + IF (KCASE == 0) THEN + CALL PUSH_RANGE(CDNAME) + ELSEIF (KCASE==1) THEN + CALL POP_RANGE(CDNAME) + ENDIF +ENDIF +#endif + IF (.NOT.LDHOOK) RETURN IMYTID = OML_MY_THREAD() diff --git a/src/fiat/mynvtx/map.cc b/src/fiat/mynvtx/map.cc new file mode 100644 index 0000000..1b3ca9e --- /dev/null +++ b/src/fiat/mynvtx/map.cc @@ -0,0 +1,75 @@ +#include +#include +#include +using namespace std; + +extern "C" double MPI_Wtime (); +#pragma weak MPI_Wtime + +//extern int map_start(const char * str); +//extern void map_stop(); + +struct mystruct { + + int calls = 0; + double elapsed = 0; + double t0 = 0; + +}; + +template +struct my_equal_to : public binary_function<_Tp, _Tp, bool> +{ + bool operator()(const _Tp& __x, const _Tp& __y) const + { return strcmp( __x, __y ) == 0; } +}; + + +struct Hash_Func{ + //BKDR hash algorithm + int operator()(const char * str)const + { + int seed = 131;//31 131 1313 13131131313 etc// + int hash = 0; + while(*str) + { + hash = (hash * seed) + (*str); + str ++; + } + + return hash & (0x7FFFFFFF); + } +}; + +struct mystruct * stack[128]; + +std::unordered_map> map; +static int ilast = 0; +extern "C" +int map_start(const char * str) { + + struct mystruct * elem = &(map[str]); + ilast++; + stack[ilast] = elem; + elem->calls ++; + if (elem->calls >= 11 && elem->elapsed < 0.0001) { + return 0; + } + if (elem->calls > 1) + elem->t0 = MPI_Wtime(); + return 1; +} + +extern "C" +int map_stop() { + + struct mystruct * last = stack[ilast]; + ilast--; + if (last->calls >= 11 && last->elapsed < 0.0001) { + return 0; + } + if (last->calls > 1) + last->elapsed += MPI_Wtime() - last->t0; + return 1; +} + diff --git a/src/fiat/mynvtx/nvtx.F90 b/src/fiat/mynvtx/nvtx.F90 new file mode 100644 index 0000000..ad2bdeb --- /dev/null +++ b/src/fiat/mynvtx/nvtx.F90 @@ -0,0 +1,54 @@ + +!!mpif90 nvtx.f90 -Mpreprocess -c -O2 + +#ifndef NVTX_PROFILE +#define NVTX_PROFILE 1 +#endif + +module mynvtx + use iso_c_binding + implicit none + interface + subroutine mynvtxstart(name) + use iso_c_binding + character(kind=c_char,len=*) :: name + end subroutine + subroutine nvtxRangePop() bind(c,name="nvtxRangePop") + end subroutine + end interface + PUBLIC :: PUSH_RANGE + PUBLIC :: POP_RANGE + contains + subroutine PUSH_RANGE(fstr) + character(kind=c_char,len=*), intent(in) :: fstr +#if NVTX_PROFILE != 0 + character(kind=c_char,len=1024) :: cstr + !$omp master + + cstr=trim(fstr)//c_null_char + + call mynvtxstart(cstr) + !$omp end master +#endif + end subroutine PUSH_RANGE + + subroutine POP_RANGE(fstr) + character(kind=c_char,len=*), intent(in) :: fstr +#ifdef NVTX_VERYVERBOSE + character(kind=c_char,len=1024) :: cstr +#endif + !$omp master +#if NVTX_PROFILE != 0 +#ifdef NVTX_VERYVERBOSE + cstr=trim(fstr)//c_null_char + call mynvtxend(cstr) +#else + call mynvtxend() +#endif + +!! call nvtxRangePop +#endif +!$omp end master + end subroutine POP_RANGE +end module mynvtx + diff --git a/src/fiat/mynvtx/nvtx.c b/src/fiat/mynvtx/nvtx.c new file mode 100644 index 0000000..800e192 --- /dev/null +++ b/src/fiat/mynvtx/nvtx.c @@ -0,0 +1,90 @@ +#include +#include + +// mpicc -c nvtx.c -O2 + + uint32_t myadler32(const unsigned char *data) + { + const uint32_t MOD_ADLER = 65521; + uint32_t a = 1, b = 0; + size_t index; + for (index = 0; data[index] != 0; ++index) + { + a = (a + data[index]*2) % MOD_ADLER; + b = (b + a) % MOD_ADLER; + } + return (b << 16) | a; + } + extern int map_start(const char * str); +#ifdef NVTX_VERYVERBOSE + const char namestack[256][256]; + int istack=0; +#endif +// int first = 1; +void mynvtxstart_(const char *name) { + if (!map_start(name)) { +#ifdef NVTX_VERYVERBOSE + for(int i=0;i> 12; + b=(color_id & 0x0ff00000) >> 20; + if (r<64 & g<64 & b<64) { + r=r*3; + g=g*3+64; + b=b*4; + } + + color_id = 0xff000000 | (r << 16) | (g << 8) | (b); + nvtxEventAttributes_t eventAttrib = {0}; + eventAttrib.version = NVTX_VERSION; + eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; + eventAttrib.colorType = NVTX_COLOR_ARGB; + eventAttrib.color = color_id; + eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; + eventAttrib.message.ascii = name; +#ifdef NVTX_VERYVERBOSE + for(int i=0;i Date: Fri, 7 Jun 2024 12:31:30 +0000 Subject: [PATCH 2/7] Try nvhpc-24.3 instead of nvhpc-21.9 --- .github/workflows/build.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d77c66b..5043331 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -33,7 +33,7 @@ jobs: name: - linux gnu-10 - linux clang-12 - - linux nvhpc-21.9 + - linux nvhpc-24.3 - linux intel-classic - macos @@ -64,9 +64,9 @@ jobs: compiler_fc: gfortran-10 caching: true - - name: linux nvhpc-21.9 + - name: linux nvhpc-24.3 os: ubuntu-20.04 - compiler: nvhpc-21.9 + compiler: nvhpc-24.3 compiler_cc: nvc compiler_cxx: nvc++ compiler_fc: nvfortran From dfad60a9d5970a03d2cf0d0f4eb1f2bfd7e72581 Mon Sep 17 00:00:00 2001 From: Philippe Marguinaud Date: Fri, 7 Jun 2024 13:18:25 +0000 Subject: [PATCH 3/7] Rename mynvtx -> dr_nvtx + cleaning --- src/fiat/CMakeLists.txt | 4 +- src/fiat/dr_nvtx/dr_nvtx.F90 | 52 ++++++++++ src/fiat/dr_nvtx/dr_nvtx.c | 116 ++++++++++++++++++++++ src/fiat/dr_nvtx/dr_nvtx_map.cc | 78 +++++++++++++++ src/fiat/dr_nvtx/dr_nvtx_map.h | 18 ++++ src/fiat/drhook/internal/dr_hook_util.F90 | 6 +- src/fiat/mynvtx/map.cc | 75 -------------- src/fiat/mynvtx/nvtx.F90 | 54 ---------- src/fiat/mynvtx/nvtx.c | 90 ----------------- src/fiat/mynvtx/nvtx.h | 10 -- 10 files changed, 269 insertions(+), 234 deletions(-) create mode 100644 src/fiat/dr_nvtx/dr_nvtx.F90 create mode 100644 src/fiat/dr_nvtx/dr_nvtx.c create mode 100644 src/fiat/dr_nvtx/dr_nvtx_map.cc create mode 100644 src/fiat/dr_nvtx/dr_nvtx_map.h delete mode 100644 src/fiat/mynvtx/map.cc delete mode 100644 src/fiat/mynvtx/nvtx.F90 delete mode 100644 src/fiat/mynvtx/nvtx.c delete mode 100644 src/fiat/mynvtx/nvtx.h diff --git a/src/fiat/CMakeLists.txt b/src/fiat/CMakeLists.txt index 3104fe8..a59fc38 100644 --- a/src/fiat/CMakeLists.txt +++ b/src/fiat/CMakeLists.txt @@ -45,9 +45,9 @@ configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/library/version.c.in ${CMAKE_CURRENT ecbuild_list_add_pattern( LIST fiat_src GLOB *.c *.F* *.cc ) if(NOT (CMAKE_C_COMPILER_ID STREQUAL "PGI" OR CMAKE_C_COMPILER_ID STREQUAL "NVHPC" )) - # The files in the mynvtx directory are only intended to work with NVHPC + # The files in the dr_nvtx directory are only intended to work with NVHPC # So don't try to compile them when using another compiler - ecbuild_list_exclude_pattern( LIST fiat_src REGEX mynvtx/* ) + ecbuild_list_exclude_pattern( LIST fiat_src REGEX dr_nvtx/* ) endif() set( fiat_src ${fiat_src} PARENT_SCOPE ) diff --git a/src/fiat/dr_nvtx/dr_nvtx.F90 b/src/fiat/dr_nvtx/dr_nvtx.F90 new file mode 100644 index 0000000..bc6e5fb --- /dev/null +++ b/src/fiat/dr_nvtx/dr_nvtx.F90 @@ -0,0 +1,52 @@ +module dr_nvtx + +use iso_c_binding +implicit none + +interface + +subroutine dr_nvtx_start (name) +use iso_c_binding +character(kind=c_char,len=*) :: name +end subroutine + +subroutine dr_nvtx_end (name) +use iso_c_binding +character(kind=c_char,len=*) :: name +end subroutine + +end interface + +public :: dr_nvtx_push_range +public :: dr_nvtx_pop_range + +contains + +subroutine dr_nvtx_push_range (fstr) +character(kind=c_char,len=*), intent(in) :: fstr +character(kind=c_char,len=1024) :: cstr + +!$omp master + +cstr=trim(fstr)//c_null_char +call dr_nvtx_start (cstr) + +!$omp end master + +end subroutine + +subroutine dr_nvtx_pop_range (fstr) +character(kind=c_char,len=*), intent(in) :: fstr +character(kind=c_char,len=1024) :: cstr + +!$omp master + +cstr=trim(fstr)//c_null_char +call dr_nvtx_end (cstr) + +!$omp end master + +end subroutine + +end module dr_nvtx + diff --git a/src/fiat/dr_nvtx/dr_nvtx.c b/src/fiat/dr_nvtx/dr_nvtx.c new file mode 100644 index 0000000..5557b0f --- /dev/null +++ b/src/fiat/dr_nvtx/dr_nvtx.c @@ -0,0 +1,116 @@ +#include +#include +#include + +#include "dr_nvtx_map.h" + +#define INDENT(n) \ +do { \ + int __i; \ + for (int __i = 0; __i < (n); __i++) \ + printf (" "); \ +} while (1) + +static uint32_t myadler32 (const unsigned char *data) +{ + const uint32_t MOD_ADLER = 65521; + uint32_t a = 1, b = 0; + size_t index; + + for (index = 0; data[index] != 0; ++index) + { + a = (a + data[index]*2) % MOD_ADLER; + b = (b + a) % MOD_ADLER; + } + + return (b << 16) | a; +} + +#ifdef NVTX_VERYVERBOSE +static const char namestack[256][256]; +static int istack=0; +#endif + +void dr_nvtx_start_ (const char * name) +{ + if (! dr_nvtx_map_start (name)) + { +#ifdef NVTX_VERYVERBOSE + INDENT (istack); + printf ("Skipped open --- %s\n", name); +#endif + return; + } + + int hash = 0; + int color_id = myadler32 ((const unsigned char*)name); + int r,g,b; + + r=color_id & 0x000000ff; + g=(color_id & 0x000ff000) >> 12; + b=(color_id & 0x0ff00000) >> 20; + + if (r<64 & g<64 & b<64) + { + r=r*3; + g=g*3+64; + b=b*4; + } + + color_id = 0xff000000 | (r << 16) | (g << 8) | (b); + + nvtxEventAttributes_t eventAttrib = {0}; + eventAttrib.version = NVTX_VERSION; + eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; + eventAttrib.colorType = NVTX_COLOR_ARGB; + eventAttrib.color = color_id; + eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; + eventAttrib.message.ascii = name; + +#ifdef NVTX_VERYVERBOSE + INDENT (istack); + printf ("Opening %s\n", name); +#endif + + nvtxRangePushEx (&eventAttrib); + +#ifdef NVTX_VERYVERBOSE + strncpy (namestack[istack], name, 128); + istack++; +#endif + +} + +void dr_nvtx_end_ (const char * name) +{ + + if (! dr_nvtx_map_stop ()) + { +#ifdef NVTX_VERYVERBOSE + INDENT (istack); + printf ("Skipped end --- %s\n",name); +#endif + return; + } + +#ifdef NVTX_VERYVERBOSE + istack--; + if (istack < 0) + { + printf ("NVTX error negative stack\n"); + abort (); + } + + INDENT (istack); + + printf ("Closing %s\n",name); + + if (strcmp (name,namestack[istack])) + { + printf ("Error just closed the wrong marker: %s expected: %s\n",name, namestack[istack]); + abort (); + } +#endif + + nvtxRangePop (); +} diff --git a/src/fiat/dr_nvtx/dr_nvtx_map.cc b/src/fiat/dr_nvtx/dr_nvtx_map.cc new file mode 100644 index 0000000..776a1ef --- /dev/null +++ b/src/fiat/dr_nvtx/dr_nvtx_map.cc @@ -0,0 +1,78 @@ +#include +#include +#include + +#include "dr_nvtx_map.h" + +using namespace std; + +extern "C" double MPI_Wtime (); +#pragma weak MPI_Wtime + +namespace +{ + struct counter + { + int calls = 0; + double elapsed = 0; + double t0 = 0; + }; + + template + struct equal_to : public binary_function<_Tp, _Tp, bool> + { + bool operator()(const _Tp& __x, const _Tp& __y) const + { + return strcmp( __x, __y ) == 0; + } + }; + + + struct hash + { + //BKDR hash algorithm + int operator() (const char * str) const + { + int seed = 131;//31 131 1313 13131131313 etc// + int hash = 0; + while(*str) + { + hash = (hash * seed) + (*str); + str ++; + } + + return hash & (0x7FFFFFFF); + } + }; + + counter * stack[128]; + + std::unordered_map> map; + + int ilast = 0; +}; + +extern "C" int dr_nvtx_map_start (const char * str) +{ + counter * elem = &(map[str]); + ilast++; + stack[ilast] = elem; + elem->calls ++; + if (elem->calls >= 11 && elem->elapsed < 0.0001) + return 0; + if (elem->calls > 1) + elem->t0 = MPI_Wtime(); + return 1; +} + +extern "C" int dr_nvtx_map_stop () +{ + counter * last = stack[ilast]; + ilast--; + if (last->calls >= 11 && last->elapsed < 0.0001) + return 0; + if (last->calls > 1) + last->elapsed += MPI_Wtime() - last->t0; + return 1; +} + diff --git a/src/fiat/dr_nvtx/dr_nvtx_map.h b/src/fiat/dr_nvtx/dr_nvtx_map.h new file mode 100644 index 0000000..63b64c6 --- /dev/null +++ b/src/fiat/dr_nvtx/dr_nvtx_map.h @@ -0,0 +1,18 @@ +#ifndef _DR_NVTX_MAP_START +#define _DR_NVTX_MAP_START + +#ifdef __cplusplus +extern "C" +{ +#endif + +int dr_nvtx_map_start (const char * str); +int dr_nvtx_map_stop (); + +#ifdef __cplusplus +} +#endif + + +#endif + diff --git a/src/fiat/drhook/internal/dr_hook_util.F90 b/src/fiat/drhook/internal/dr_hook_util.F90 index 491b39e..4c33376 100644 --- a/src/fiat/drhook/internal/dr_hook_util.F90 +++ b/src/fiat/drhook/internal/dr_hook_util.F90 @@ -15,7 +15,7 @@ SUBROUTINE DR_HOOK_UTIL(LDHOOK,CDNAME,KCASE,PKEY,CDFILENAME,KSIZEINFO) USE DR_HACK_MOD, ONLY : LL_DRHACK, DR_HACK_INIT, DR_HACK #ifdef __PGI USE NVTX -USE MYNVTX +USE DR_NVTX #endif IMPLICIT NONE @@ -55,9 +55,9 @@ SUBROUTINE DR_HOOK_UTIL(LDHOOK,CDNAME,KCASE,PKEY,CDFILENAME,KSIZEINFO) IF (II_DRNVTX == 1) THEN IF (KCASE == 0) THEN - CALL PUSH_RANGE(CDNAME) + CALL DR_NVTX_PUSH_RANGE (CDNAME) ELSEIF (KCASE==1) THEN - CALL POP_RANGE(CDNAME) + CALL DR_NVTX_POP_RANGE (CDNAME) ENDIF ENDIF #endif diff --git a/src/fiat/mynvtx/map.cc b/src/fiat/mynvtx/map.cc deleted file mode 100644 index 1b3ca9e..0000000 --- a/src/fiat/mynvtx/map.cc +++ /dev/null @@ -1,75 +0,0 @@ -#include -#include -#include -using namespace std; - -extern "C" double MPI_Wtime (); -#pragma weak MPI_Wtime - -//extern int map_start(const char * str); -//extern void map_stop(); - -struct mystruct { - - int calls = 0; - double elapsed = 0; - double t0 = 0; - -}; - -template -struct my_equal_to : public binary_function<_Tp, _Tp, bool> -{ - bool operator()(const _Tp& __x, const _Tp& __y) const - { return strcmp( __x, __y ) == 0; } -}; - - -struct Hash_Func{ - //BKDR hash algorithm - int operator()(const char * str)const - { - int seed = 131;//31 131 1313 13131131313 etc// - int hash = 0; - while(*str) - { - hash = (hash * seed) + (*str); - str ++; - } - - return hash & (0x7FFFFFFF); - } -}; - -struct mystruct * stack[128]; - -std::unordered_map> map; -static int ilast = 0; -extern "C" -int map_start(const char * str) { - - struct mystruct * elem = &(map[str]); - ilast++; - stack[ilast] = elem; - elem->calls ++; - if (elem->calls >= 11 && elem->elapsed < 0.0001) { - return 0; - } - if (elem->calls > 1) - elem->t0 = MPI_Wtime(); - return 1; -} - -extern "C" -int map_stop() { - - struct mystruct * last = stack[ilast]; - ilast--; - if (last->calls >= 11 && last->elapsed < 0.0001) { - return 0; - } - if (last->calls > 1) - last->elapsed += MPI_Wtime() - last->t0; - return 1; -} - diff --git a/src/fiat/mynvtx/nvtx.F90 b/src/fiat/mynvtx/nvtx.F90 deleted file mode 100644 index ad2bdeb..0000000 --- a/src/fiat/mynvtx/nvtx.F90 +++ /dev/null @@ -1,54 +0,0 @@ - -!!mpif90 nvtx.f90 -Mpreprocess -c -O2 - -#ifndef NVTX_PROFILE -#define NVTX_PROFILE 1 -#endif - -module mynvtx - use iso_c_binding - implicit none - interface - subroutine mynvtxstart(name) - use iso_c_binding - character(kind=c_char,len=*) :: name - end subroutine - subroutine nvtxRangePop() bind(c,name="nvtxRangePop") - end subroutine - end interface - PUBLIC :: PUSH_RANGE - PUBLIC :: POP_RANGE - contains - subroutine PUSH_RANGE(fstr) - character(kind=c_char,len=*), intent(in) :: fstr -#if NVTX_PROFILE != 0 - character(kind=c_char,len=1024) :: cstr - !$omp master - - cstr=trim(fstr)//c_null_char - - call mynvtxstart(cstr) - !$omp end master -#endif - end subroutine PUSH_RANGE - - subroutine POP_RANGE(fstr) - character(kind=c_char,len=*), intent(in) :: fstr -#ifdef NVTX_VERYVERBOSE - character(kind=c_char,len=1024) :: cstr -#endif - !$omp master -#if NVTX_PROFILE != 0 -#ifdef NVTX_VERYVERBOSE - cstr=trim(fstr)//c_null_char - call mynvtxend(cstr) -#else - call mynvtxend() -#endif - -!! call nvtxRangePop -#endif -!$omp end master - end subroutine POP_RANGE -end module mynvtx - diff --git a/src/fiat/mynvtx/nvtx.c b/src/fiat/mynvtx/nvtx.c deleted file mode 100644 index 800e192..0000000 --- a/src/fiat/mynvtx/nvtx.c +++ /dev/null @@ -1,90 +0,0 @@ -#include -#include - -// mpicc -c nvtx.c -O2 - - uint32_t myadler32(const unsigned char *data) - { - const uint32_t MOD_ADLER = 65521; - uint32_t a = 1, b = 0; - size_t index; - for (index = 0; data[index] != 0; ++index) - { - a = (a + data[index]*2) % MOD_ADLER; - b = (b + a) % MOD_ADLER; - } - return (b << 16) | a; - } - extern int map_start(const char * str); -#ifdef NVTX_VERYVERBOSE - const char namestack[256][256]; - int istack=0; -#endif -// int first = 1; -void mynvtxstart_(const char *name) { - if (!map_start(name)) { -#ifdef NVTX_VERYVERBOSE - for(int i=0;i> 12; - b=(color_id & 0x0ff00000) >> 20; - if (r<64 & g<64 & b<64) { - r=r*3; - g=g*3+64; - b=b*4; - } - - color_id = 0xff000000 | (r << 16) | (g << 8) | (b); - nvtxEventAttributes_t eventAttrib = {0}; - eventAttrib.version = NVTX_VERSION; - eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; - eventAttrib.colorType = NVTX_COLOR_ARGB; - eventAttrib.color = color_id; - eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII; - eventAttrib.message.ascii = name; -#ifdef NVTX_VERYVERBOSE - for(int i=0;i Date: Fri, 7 Jun 2024 13:43:21 +0000 Subject: [PATCH 4/7] Make DR_NVTX optional, use HAVE_DR_NVTX instead of __PGI in Fortran/C --- .github/tools/install-nvhpc.sh | 6 +++--- CMakeLists.txt | 12 +++++++++++- src/fiat/CMakeLists.txt | 11 ++++++----- src/fiat/drhook/internal/dr_hook_util.F90 | 6 +++--- src/fiat/{dr_nvtx => drnvtx}/dr_nvtx.F90 | 0 src/fiat/{dr_nvtx => drnvtx}/dr_nvtx.c | 0 src/fiat/{dr_nvtx => drnvtx}/dr_nvtx_map.cc | 0 src/fiat/{dr_nvtx => drnvtx}/dr_nvtx_map.h | 0 8 files changed, 23 insertions(+), 12 deletions(-) rename src/fiat/{dr_nvtx => drnvtx}/dr_nvtx.F90 (100%) rename src/fiat/{dr_nvtx => drnvtx}/dr_nvtx.c (100%) rename src/fiat/{dr_nvtx => drnvtx}/dr_nvtx_map.cc (100%) rename src/fiat/{dr_nvtx => drnvtx}/dr_nvtx_map.h (100%) diff --git a/.github/tools/install-nvhpc.sh b/.github/tools/install-nvhpc.sh index 8a8f332..1d9fa83 100755 --- a/.github/tools/install-nvhpc.sh +++ b/.github/tools/install-nvhpc.sh @@ -12,7 +12,7 @@ # See for # details. -version=21.9 +version=24.3 TEMPORARY_FILES="${TMPDIR:-/tmp}" export NVHPC_INSTALL_DIR=$(pwd)/nvhpc-install @@ -55,8 +55,8 @@ if [ -d "${NVHPC_INSTALL_DIR}" ]; then fi fi -# Example download URL for version 21.9 -# https://developer.download.nvidia.com/hpc-sdk/21.9/nvhpc_2020_219_Linux_x86_64_cuda_11.0.tar.gz +# Example download URL for version 24.3 +# https://developer.download.nvidia.com/hpc-sdk/24.3/nvhpc_2020_219_Linux_x86_64_cuda_11.0.tar.gz ver="$(echo $version | tr -d . )" URL=$(curl -s "https://developer.nvidia.com/nvidia-hpc-sdk-$ver-downloads" | grep -oP "https://developer.download.nvidia.com/hpc-sdk/([0-9]{2}\.[0-9]+)/nvhpc_([0-9]{4})_([0-9]+)_Linux_$(uname -m)_cuda_([0-9\.]+).tar.gz" | sort | tail -1) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6903d48..819663b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,11 +49,21 @@ ecbuild_add_option( FEATURE WARNINGS DEFAULT ON DESCRIPTION "Add warnings to compiler" ) +if(CMAKE_C_COMPILER_ID STREQUAL "PGI" OR CMAKE_C_COMPILER_ID STREQUAL "NVHPC" ) + set (DEFAULT_DR_NVTX ON) +else () + set (DEFAULT_DR_NVTX OFF) +endif () + +ecbuild_add_option( FEATURE DR_NVTX + DEFAULT ${DEFAULT_DR_NVTX} + DESCRIPTION "Add nvtx intrumentation" ) + ecbuild_find_package( NAME Realtime QUIET ) #### -if(CMAKE_C_COMPILER_ID STREQUAL "PGI" OR CMAKE_C_COMPILER_ID STREQUAL "NVHPC" ) +if(HAVE_DR_NVTX) find_package(CUDAToolkit REQUIRED COMPONENTS nvtx3) find_package(NVHPC REQUIRED COMPONENTS HOSTUTILS) diff --git a/src/fiat/CMakeLists.txt b/src/fiat/CMakeLists.txt index a59fc38..6995d54 100644 --- a/src/fiat/CMakeLists.txt +++ b/src/fiat/CMakeLists.txt @@ -44,10 +44,11 @@ endif() configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/library/version.c.in ${CMAKE_CURRENT_BINARY_DIR}/version.c @ONLY ) ecbuild_list_add_pattern( LIST fiat_src GLOB *.c *.F* *.cc ) -if(NOT (CMAKE_C_COMPILER_ID STREQUAL "PGI" OR CMAKE_C_COMPILER_ID STREQUAL "NVHPC" )) - # The files in the dr_nvtx directory are only intended to work with NVHPC - # So don't try to compile them when using another compiler - ecbuild_list_exclude_pattern( LIST fiat_src REGEX dr_nvtx/* ) + +if( NOT HAVE_DR_NVTX) + # The files in the drnvtx directory are only intended to work with NVHPC + # So don't try to compile them when using another compiler + ecbuild_list_exclude_pattern( LIST fiat_src REGEX drnvtx/* ) endif() set( fiat_src ${fiat_src} PARENT_SCOPE ) @@ -74,7 +75,7 @@ ecbuild_add_library( TARGET fiat ## if compiler is pgi add two libs -if(CMAKE_C_COMPILER_ID STREQUAL "PGI" OR CMAKE_C_COMPILER_ID STREQUAL "NVHPC" ) +if (HAVE_DR_NVTX) target_link_libraries(fiat PUBLIC ${NVTOOLSEXT_LIB}) target_link_libraries(fiat PUBLIC ${NVHPCWRAPNVTX_LIB}) target_include_directories(fiat PRIVATE "${CUDAToolkit_LIBRARY_DIR}/../include") diff --git a/src/fiat/drhook/internal/dr_hook_util.F90 b/src/fiat/drhook/internal/dr_hook_util.F90 index 4c33376..010741e 100644 --- a/src/fiat/drhook/internal/dr_hook_util.F90 +++ b/src/fiat/drhook/internal/dr_hook_util.F90 @@ -13,7 +13,7 @@ SUBROUTINE DR_HOOK_UTIL(LDHOOK,CDNAME,KCASE,PKEY,CDFILENAME,KSIZEINFO) USE OML_MOD , ONLY : OML_MY_THREAD USE YOMHOOK , ONLY : LHOOK USE DR_HACK_MOD, ONLY : LL_DRHACK, DR_HACK_INIT, DR_HACK -#ifdef __PGI +#ifdef HAVE_DR_NVTX USE NVTX USE DR_NVTX #endif @@ -37,13 +37,13 @@ SUBROUTINE DR_HOOK_UTIL(LDHOOK,CDNAME,KCASE,PKEY,CDFILENAME,KSIZEINFO) #include "dr_hook_init.intfb.h" -#ifdef __PGI +#ifdef HAVE_DR_NVTX INTEGER, SAVE :: II_DRNVTX = 0 ! 0=no initialized, -1=nvtx off, +1=nvtx on CHARACTER*32 :: CL_NVTX #endif -#ifdef __PGI +#ifdef HAVE_DR_NVTX IF (II_DRNVTX == 0) THEN CALL GETENV ('DR_NVTX', CL_NVTX) IF (CL_NVTX == '1') THEN diff --git a/src/fiat/dr_nvtx/dr_nvtx.F90 b/src/fiat/drnvtx/dr_nvtx.F90 similarity index 100% rename from src/fiat/dr_nvtx/dr_nvtx.F90 rename to src/fiat/drnvtx/dr_nvtx.F90 diff --git a/src/fiat/dr_nvtx/dr_nvtx.c b/src/fiat/drnvtx/dr_nvtx.c similarity index 100% rename from src/fiat/dr_nvtx/dr_nvtx.c rename to src/fiat/drnvtx/dr_nvtx.c diff --git a/src/fiat/dr_nvtx/dr_nvtx_map.cc b/src/fiat/drnvtx/dr_nvtx_map.cc similarity index 100% rename from src/fiat/dr_nvtx/dr_nvtx_map.cc rename to src/fiat/drnvtx/dr_nvtx_map.cc diff --git a/src/fiat/dr_nvtx/dr_nvtx_map.h b/src/fiat/drnvtx/dr_nvtx_map.h similarity index 100% rename from src/fiat/dr_nvtx/dr_nvtx_map.h rename to src/fiat/drnvtx/dr_nvtx_map.h From 6a9e9c15c4519ed12a79fc1f836d9ff417d4316d Mon Sep 17 00:00:00 2001 From: Philippe Marguinaud Date: Mon, 10 Jun 2024 13:52:28 +0000 Subject: [PATCH 5/7] Try freeing some disk space --- .github/tools/install-nvhpc.sh | 2 +- .github/workflows/build.yml | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/.github/tools/install-nvhpc.sh b/.github/tools/install-nvhpc.sh index 1d9fa83..ab95783 100755 --- a/.github/tools/install-nvhpc.sh +++ b/.github/tools/install-nvhpc.sh @@ -56,7 +56,7 @@ if [ -d "${NVHPC_INSTALL_DIR}" ]; then fi # Example download URL for version 24.3 -# https://developer.download.nvidia.com/hpc-sdk/24.3/nvhpc_2020_219_Linux_x86_64_cuda_11.0.tar.gz +# https://developer.download.nvidia.com/hpc-sdk/24.3/nvhpc_2024_243_Linux_x86_64_cuda_12.3.tar.gz ver="$(echo $version | tr -d . )" URL=$(curl -s "https://developer.nvidia.com/nvidia-hpc-sdk-$ver-downloads" | grep -oP "https://developer.download.nvidia.com/hpc-sdk/([0-9]{2}\.[0-9]+)/nvhpc_([0-9]{4})_([0-9]+)_Linux_$(uname -m)_cuda_([0-9\.]+).tar.gz" | sort | tail -1) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5043331..e078d88 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -120,6 +120,25 @@ jobs: path: ${{ env.DEPS_DIR }} key: deps-${{ matrix.os }}-${{ matrix.compiler }}-${{ matrix.build_type }}-${{ env.CACHE_SUFFIX }} + # Free up disk space for nvhpc + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@main + if: contains( matrix.arch, 'nvhpc' ) + continue-on-error: true + with: + # this might remove tools that are actually needed, + # if set to "true" but frees about 6 GB + tool-cache: false + + # all of these default to true, but feel free to set to + # "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + docker-images: true + swap-storage: true + - name: Install NVHPC compiler if: contains( matrix.compiler, 'nvhpc' ) shell: bash -eux {0} From 5a22f6939083855597d2da7548155c41c647fb87 Mon Sep 17 00:00:00 2001 From: Philippe Marguinaud Date: Wed, 12 Jun 2024 08:46:15 +0000 Subject: [PATCH 6/7] Add test for DR_NVTX --- src/fiat/CMakeLists.txt | 1 + tests/CMakeLists.txt | 3 + tests/drnvtx/CMakeLists.txt | 8 +++ tests/drnvtx/drnvtx_ex1.F90 | 91 ++++++++++++++++++++++++++++ tests/drnvtx/fiat_test_drnvtx_ex1.sh | 17 ++++++ 5 files changed, 120 insertions(+) create mode 100644 tests/drnvtx/CMakeLists.txt create mode 100644 tests/drnvtx/drnvtx_ex1.F90 create mode 100755 tests/drnvtx/fiat_test_drnvtx_ex1.sh diff --git a/src/fiat/CMakeLists.txt b/src/fiat/CMakeLists.txt index f74c1ef..ff22a9b 100644 --- a/src/fiat/CMakeLists.txt +++ b/src/fiat/CMakeLists.txt @@ -79,6 +79,7 @@ if (HAVE_DR_NVTX) target_link_libraries(fiat PUBLIC ${NVTOOLSEXT_LIB}) target_link_libraries(fiat PUBLIC ${NVHPCWRAPNVTX_LIB}) target_include_directories(fiat PRIVATE "${CUDAToolkit_LIBRARY_DIR}/../include") + add_compile_definitions(HAVE_DR_NVTX) endif() if( ${CMAKE_SYSTEM_NAME} MATCHES "Darwin" ) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 22c73c9..95abb4f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -43,6 +43,9 @@ endif() add_subdirectory( drhook ) +if (HAVE_DR_NVTX) + add_subdirectory( drnvtx ) +endif () # ---------------------------------------------------------------------------------------- diff --git a/tests/drnvtx/CMakeLists.txt b/tests/drnvtx/CMakeLists.txt new file mode 100644 index 0000000..19255f6 --- /dev/null +++ b/tests/drnvtx/CMakeLists.txt @@ -0,0 +1,8 @@ +ecbuild_add_executable( TARGET drnvtx_ex1 + SOURCES drnvtx_ex1.F90 + LIBS fiat + LINKER_LANGUAGE Fortran + NOINSTALL ) + +add_test (NAME fiat_test_drnvtx_ex1 COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/fiat_test_drnvtx_ex1.sh) + diff --git a/tests/drnvtx/drnvtx_ex1.F90 b/tests/drnvtx/drnvtx_ex1.F90 new file mode 100644 index 0000000..842d49b --- /dev/null +++ b/tests/drnvtx/drnvtx_ex1.F90 @@ -0,0 +1,91 @@ +! (C) Copyright 2005- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. + +program drnvtx_ex1 + +use yomhook, only : jphook, dr_hook + +implicit none + +real(jphook) :: zhook_handle + +call dr_hook('drnvtx_ex1',0,zhook_handle) + +call sub (3) + +call dr_hook('drnvtx_ex1',1,zhook_handle) + +contains + +recursive subroutine sub (depth) + +integer :: depth + +character(len=128) :: clname +real(jphook) :: zhook_handle + +integer :: i + +if (depth <= 0) return + +do i = 1, len (clname) + clname (i:i) = ' ' +enddo + +do i = 1, 16 + clname (i:i) = char (irand (ichar ('A'), ichar ('Z'))) +enddo + +call dr_hook(clname,0,zhook_handle) + +call sleep (real (irand (10, 200))) + +do i = 1, irand (0, 4) + call sub (depth-1) + call sleep (real (irand (10, 200))) +enddo + +call dr_hook(clname,1,zhook_handle) + +end subroutine + +subroutine sleep (dt) + +implicit none + +real, intent (in) :: dt + +integer, dimension (8) :: t +integer :: s1,s2,ms1,ms2 + +call date_and_time(values=t) +ms1=(t(5)*3600+t(6)*60+t(7))*1000+t(8) + +do + call date_and_time(values=t) + ms2=(t(5)*3600+t(6)*60+t(7))*1000+t(8) + if(ms2-ms1>=dt)exit +enddo + +end subroutine sleep + +integer function irand (k1, k2) + +integer :: k1, k2 + +integer*8, save :: x = 2713 + +x = modulo (16807_8 * x, 2147483647_8) + +irand = k1 + modulo (x, int (k2-k1+1, 8)) + +end function + +end program drnvtx_ex1 + diff --git a/tests/drnvtx/fiat_test_drnvtx_ex1.sh b/tests/drnvtx/fiat_test_drnvtx_ex1.sh new file mode 100755 index 0000000..0475bab --- /dev/null +++ b/tests/drnvtx/fiat_test_drnvtx_ex1.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -xe + +export DR_HOOK=1 +export DR_NVTX=1 + +\rm -f nsys.drnvtx_ex1.qdrep + +nsys profile --force-overwrite true --trace nvtx --kill=none --output=nsys.drnvtx_ex1.qdrep ./drnvtx_ex1 + +ls -lrt + +if [ ! -f "nsys.drnvtx_ex1.nsys-rep" ] +then + exit 1 +fi From 4a0656f61c9cf209e3a8e35767192c6e0de1fab6 Mon Sep 17 00:00:00 2001 From: Philippe Marguinaud Date: Wed, 12 Jun 2024 09:01:23 +0000 Subject: [PATCH 7/7] Add test for DR_NVTX --- tests/drnvtx/drnvtx_ex1.F90 | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/tests/drnvtx/drnvtx_ex1.F90 b/tests/drnvtx/drnvtx_ex1.F90 index 842d49b..dc54332 100644 --- a/tests/drnvtx/drnvtx_ex1.F90 +++ b/tests/drnvtx/drnvtx_ex1.F90 @@ -17,7 +17,7 @@ program drnvtx_ex1 call dr_hook('drnvtx_ex1',0,zhook_handle) -call sub (3) +call sub (0) call dr_hook('drnvtx_ex1',1,zhook_handle) @@ -32,7 +32,7 @@ recursive subroutine sub (depth) integer :: i -if (depth <= 0) return +if (depth > irand (2, 4)) return do i = 1, len (clname) clname (i:i) = ' ' @@ -42,12 +42,19 @@ recursive subroutine sub (depth) clname (i:i) = char (irand (ichar ('A'), ichar ('Z'))) enddo +do i = 1, depth + write (*, '(" ")', advance='no') +enddo + +write (*, '(" - ",A)') clname (1:16) + call dr_hook(clname,0,zhook_handle) call sleep (real (irand (10, 200))) -do i = 1, irand (0, 4) - call sub (depth-1) + +do i = 1, irand (1, 4) + call sub (depth+1) call sleep (real (irand (10, 200))) enddo