Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' (with hel madgraph5#960,…
Browse files Browse the repository at this point in the history
… mac madgraph5#974, nvcc madgraph5#966) into june24

Fix conflicts:
	epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
	epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/counters.cc
	epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc
	epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f
	epochX/cudacpp/gg_tt.mad/SubProcesses/counters.cc
	epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.cc

NB: here I essentially fixed gg_tt.mad, not CODEGEN, which will need to be adjusted a posteriori with a backport

In particular:
- Note1: patch.P1 is now taken from june24, but will need to be recomputed
git checkout HEAD CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
- Note2: I need to manually port some upstream/master changes in auto_dsig1.f to smatrix_multi.f, which did not yet exist
  • Loading branch information
valassi committed Aug 21, 2024
2 parents e552190 + 3f69b26 commit 2cc82f5
Show file tree
Hide file tree
Showing 17 changed files with 159 additions and 103 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,9 @@ namespace mg5amcCpu
* @param rndcol the pointer to the input random numbers for color selection
* @param channelIds the Feynman diagram to enhance in multi-channel mode if 1 to n
* @param mes the pointer to the output matrix elements
* @param goodHelOnly quit after computing good helicities?
* @param selhel the pointer to the output selected helicities
* @param selcol the pointer to the output selected colors
* @param goodHelOnly quit after computing good helicities?
*/
void gpu_sequence( const FORTRANFPTYPE* momenta,
const FORTRANFPTYPE* gs,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,26 +21,24 @@ extern "C"
{
// Now: fortran=-1, cudacpp=0
// Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc...
constexpr unsigned int nimplC = 2;
constexpr unsigned int nimplC = 3;
constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; }
const char* iimplC2TXT( int iimplC )
{
const int iimplF = iimplC - 1;
switch( iimplF )
{
case -1: return "Fortran"; break;
case +0: return "CudaCpp"; break;
case -1: return "Fortran MEs"; break;
case +0: return "CudaCpp MEs"; break;
case +1: return "CudaCpp HEL"; break;
default: assert( false ); break;
}
}

static mgOnGpu::Timer<TIMERTYPE> program_timer;
static float program_totaltime = 0;
static mgOnGpu::Timer<TIMERTYPE> smatrix1_timer;
static float smatrix1_totaltime = 0;
static mgOnGpu::Timer<TIMERTYPE> smatrix1multi_timer[nimplC];
static float smatrix1multi_totaltime[nimplC] = { 0 };
static int smatrix1_counter = 0;
static int smatrix1multi_counter[nimplC] = { 0 };

void counters_initialise_()
Expand All @@ -49,19 +47,6 @@ extern "C"
return;
}

void counters_smatrix1_start_()
{
smatrix1_counter++;
smatrix1_timer.Start();
return;
}

void counters_smatrix1_stop_()
{
smatrix1_totaltime += smatrix1_timer.GetDuration();
return;
}

void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt )
{
const unsigned int iimplC = iimplF2C( *iimplF );
Expand All @@ -86,13 +71,23 @@ extern "C"
printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime );
printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime );
for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ )
{
if( smatrix1multi_counter[iimplC] > 0 )
printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n",
iimplC2TXT( iimplC ),
iimplC + 1,
smatrix1multi_totaltime[iimplC],
smatrix1multi_counter[iimplC],
smatrix1multi_counter[iimplC] / ( smatrix1multi_totaltime[iimplC] ) );
{
if( iimplC < nimplC - 1 ) // MEs
printf( " [COUNTERS] %11s ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n",
iimplC2TXT( iimplC ),
iimplC + 1,
smatrix1multi_totaltime[iimplC],
smatrix1multi_counter[iimplC],
smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] );
else
printf( " [COUNTERS] %11s ( %1d ) : %9.4fs\n",
iimplC2TXT( iimplC ),
iimplC + 1,
smatrix1multi_totaltime[iimplC] );
}
}
return;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -116,15 +116,31 @@ override CUDA_HOME = $(patsubst %%/bin/nvcc,%%,$(shell which nvcc 2>/dev/null))
# Set HIP_HOME from the path to hipcc, if it exists
override HIP_HOME = $(patsubst %%/bin/hipcc,%%,$(shell which hipcc 2>/dev/null))

# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists
# (FIXME? Is there any equivalent of NVTX FOR HIP? What should be configured if both CUDA and HIP are installed?)
ifneq ($(CUDA_HOME),)
USE_NVTX ?=-DUSE_NVTX
CUDA_INC = -I$(CUDA_HOME)/include/
# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists (see #965)
ifeq ($(CUDA_HOME),)
# CUDA_HOME is empty (nvcc not found)
override CUDA_INC=
else ifeq ($(wildcard $(CUDA_HOME)/include/),)
# CUDA_HOME is defined (nvcc was found) but $(CUDA_HOME)/include/ does not exist?
override CUDA_INC=
else
CUDA_INC = -I$(CUDA_HOME)/include/
endif
###$(info CUDA_INC=$(CUDA_INC))

# Configure NVTX if a CUDA include directory exists and NVTX headers exist (see #965)
ifeq ($(CUDA_INC),)
# $(CUDA_HOME)/include/ does not exist
override USE_NVTX=
override CUDA_INC=
else ifeq ($(wildcard $(CUDA_HOME)/include/nvtx3/nvToolsExt.h),)
# $(CUDA_HOME)/include/ exists but NVTX headers do not exist?
override USE_NVTX=
else
# $(CUDA_HOME)/include/nvtx.h exists: use NVTX
# (NB: the option to disable NVTX if 'USE_NVTX=' is defined has been removed)
override USE_NVTX=-DUSE_NVTX
endif
###$(info USE_NVTX=$(USE_NVTX))

# NB: NEW LOGIC FOR ENABLING AND DISABLING CUDA OR HIP BUILDS (AV Feb-Mar 2024)
# - In the old implementation, by default the C++ targets for one specific AVX were always built together with either CUDA or HIP.
Expand Down Expand Up @@ -425,13 +441,18 @@ endif
# (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...)
ifeq ($(HASCURAND),)
ifeq ($(GPUCC),) # CPU-only build
ifneq ($(CUDA_HOME),)
ifeq ($(CUDA_INC),)
# $(CUDA_HOME)/include/ does not exist (see #965)
override HASCURAND = hasNoCurand
else ifeq ($(wildcard $(CUDA_HOME)/include/curand.h),)
# $(CUDA_HOME)/include/ exists but CURAND headers do not exist? (see #965)
override HASCURAND = hasNoCurand
else
# By default, assume that curand is installed if a CUDA installation exists
override HASCURAND = hasCurand
else
override HASCURAND = hasNoCurand
endif
else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build
# By default, assume that curand is installed if a CUDA build is requested
override HASCURAND = hasCurand
else # non-Nvidia GPU build
override HASCURAND = hasNoCurand
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ extern "C"
* @param mes the pointer to the output matrix elements
* @param selhel the pointer to the output selected helicities
* @param selcol the pointer to the output selected colors
* @param goodHelOnly quit after computing good helicities?
*/
void fbridgesequence_( CppObjectInFortran** ppbridge,
const FORTRANFPTYPE* momenta,
Expand All @@ -92,18 +93,20 @@ extern "C"
const unsigned int* channelIds,
FORTRANFPTYPE* mes,
int* selhel,
int* selcol )
int* selcol,
const bool* pgoodHelOnly )
{
Bridge<FORTRANFPTYPE>* pbridge = dynamic_cast<Bridge<FORTRANFPTYPE>*>( *ppbridge );
//printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) );
if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" );
#ifdef MGONGPUCPP_GPUIMPL
// Use the device/GPU implementation in the CUDA library
// (there is also a host implementation in this library)
pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol );
pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly );
#else
// Use the host/CPU implementation in the C++ library
// (there is no device implementation in this library)
pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol );
pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, channelIds, mes, selhel, selcol, *pgoodHelOnly );
#endif
}

Expand All @@ -119,6 +122,7 @@ extern "C"
* @param mes the pointer to the output matrix elements
* @param selhel the pointer to the output selected helicities
* @param selcol the pointer to the output selected colors
* @param goodHelOnly quit after computing good helicities?
*/
void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge,
const FORTRANFPTYPE* momenta,
Expand All @@ -127,9 +131,11 @@ extern "C"
const FORTRANFPTYPE* rndcol,
FORTRANFPTYPE* mes,
int* selhel,
int* selcol )
int* selcol,
const bool* pgoodHelOnly )
{
fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol );
//printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) );
fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly );
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,11 @@ C - CHANID: the input array of channels (Feynman diagrams) to enhance
C - MES: the output matrix element Fortran array
C - SELHEL: the output selected helicity Fortran array
C - SELCOL: the output selected color Fortran array
C - HELONLY: input flag, quit after computing good helicities?
C
INTERFACE
SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS,
& RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL)
& RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY)
INTEGER*8 PBRIDGE
DOUBLE PRECISION MOMENTA(*)
DOUBLE PRECISION GS(*)
Expand All @@ -53,6 +54,7 @@ C
DOUBLE PRECISION MES(*)
INTEGER*4 SELHEL(*)
INTEGER*4 SELCOL(*)
LOGICAL HELONLY
END SUBROUTINE FBRIDGESEQUENCE
END INTERFACE

Expand All @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection
C - MES: the output matrix element Fortran array
C - SELHEL: the output selected helicity Fortran array
C - SELCOL: the output selected color Fortran array
C - HELONLY: input flag, quit after computing good helicities?
C
INTERFACE
SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS,
& RNDHEL, RNDCOL, MES, SELHEL, SELCOL)
& RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY)
INTEGER*8 PBRIDGE
DOUBLE PRECISION MOMENTA(*)
DOUBLE PRECISION GS(*)
Expand All @@ -78,6 +81,7 @@ C
DOUBLE PRECISION MES(*)
INTEGER*4 SELHEL(*)
INTEGER*4 SELCOL(*)
LOGICAL HELONLY
END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL
END INTERFACE

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA
GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc)
END DO
CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466
& RNDHEL, RNDCOL, MES, SELHEL, SELCOL)
& RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities
DO IEVT = 1, NEVT
c DO IEXTERNAL = 1, NEXTERNAL
c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL,
Expand Down
1 change: 1 addition & 0 deletions epochX/cudacpp/CODEGEN/generateAndCompare.sh
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@ function codeGenAndDiff()
| awk -vdate="D:20240301000000+01'00'" '{print gensub("(^/ModDate\\().*(\\)>>endobj$)","\\1"date"\\2","g")}' \
| awk -vdate="D:20240301000000+01'00'" '{print gensub("(^/CreationDate\\().*(\\)$)","\\1"date"\\2","g")}' \
| awk -vid="0123456789abcdef0123456789abcdef" '{print gensub("(^/ID \\[<).*><.*(>\\]$)","\\1"id"><"id"\\2","g")}' \
| awk -vid="0123456789abcdef0123456789abcdef" '{print gensub("(^/ID \\[\\().*\\)\\(.*(\\)\\]$)","\\1"id")("id"\\2","g")}' \
| awk -vdate="2024-03-01T00:00:00+01:00" '{print gensub("(<xmp:ModifyDate>).*(</xmp:ModifyDate>)","\\1"date"\\2","g")}' \
| awk -vdate="2024-03-01T00:00:00+01:00" '{print gensub("(<xmp:CreateDate>).*(</xmp:CreateDate>)","\\1"date"\\2","g")}' \
| awk -vuuid="'uuid=01234567-89ab-cdef-0123-456789abcdef'" '{print gensub("(xapMM:DocumentID=).*(/>$)","\\1"uuid"\\2","g")}' \
Expand Down
2 changes: 1 addition & 1 deletion epochX/cudacpp/gg_tt.mad/SubProcesses/Bridge.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,9 @@ namespace mg5amcCpu
* @param rndcol the pointer to the input random numbers for color selection
* @param channelIds the Feynman diagram to enhance in multi-channel mode if 1 to n
* @param mes the pointer to the output matrix elements
* @param goodHelOnly quit after computing good helicities?
* @param selhel the pointer to the output selected helicities
* @param selcol the pointer to the output selected colors
* @param goodHelOnly quit after computing good helicities?
*/
void gpu_sequence( const FORTRANFPTYPE* momenta,
const FORTRANFPTYPE* gs,
Expand Down
18 changes: 10 additions & 8 deletions epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f
Original file line number Diff line number Diff line change
Expand Up @@ -555,7 +555,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,

IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2)
#endif
CALL COUNTERS_SMATRIX1MULTI_START( -1, VECSIZE_USED ) ! fortran=-1
call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1
DO IVEC=1, VECSIZE_USED
CALL SMATRIX1(P_MULTI(0,1,IVEC),
& hel_rand(IVEC),
Expand All @@ -571,7 +571,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
C *START* Included from CUDACPP template smatrix_multi.f
C (into function smatrix$i_multi in auto_dsig$i.f)
C ======================================================
CALL COUNTERS_SMATRIX1MULTI_STOP( -1 ) ! fortran=-1
call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1
#ifdef MG5AMC_MEEXPORTER_CUDACPP
ENDIF

Expand All @@ -581,9 +581,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
STOP
ENDIF
IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461)
call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1
CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering
& P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2,
& SELECTED_HEL2, SELECTED_COL2 )
& SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities
FIRST = .FALSE.
C ! This is a workaround for
C https://github.com/oliviermattelaer/mg5amc_test/issues/22
Expand All @@ -599,22 +600,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
ENDIF
WRITE (6,*) 'NGOODHEL =', NGOODHEL
WRITE (6,*) 'NCOMB =', NCOMB
call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1
ENDIF
CALL COUNTERS_SMATRIX1MULTI_START( 0, VECSIZE_USED ) ! cudacpp=0
call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0
IF ( .NOT. MULTI_CHANNEL ) THEN
CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled
& P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2,
& SELECTED_HEL2, SELECTED_COL2 )
& SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities
ELSE
IF( SDE_STRAT.NE.1 ) THEN
WRITE(6,*) 'ERROR ! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy
STOP
ENDIF
CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G,
CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled
& HEL_RAND, COL_RAND, CHANNELS, OUT2,
& SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled
& SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities
ENDIF
CALL COUNTERS_SMATRIX1MULTI_STOP( 0 ) ! cudacpp=0
call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0
ENDIF

IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA
GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc)
END DO
CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466
& RNDHEL, RNDCOL, MES, SELHEL, SELCOL)
& RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities
DO IEVT = 1, NEVT
c DO IEXTERNAL = 1, NEXTERNAL
c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL,
Expand Down
5 changes: 2 additions & 3 deletions epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
C BEGIN CODE
C ----------

call counters_smatrix1_start()
NTRY(IMIRROR)=NTRY(IMIRROR)+1
THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
DO I=1,NEXTERNAL
Expand Down Expand Up @@ -262,7 +261,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
IHEL = HEL_PICKED
ELSE
ANS = 1D0
call counters_smatrix1_stop()
RETURN
ENDIF
IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN
Expand Down Expand Up @@ -307,8 +305,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
ENDIF
ENDIF
ANS=ANS/DBLE(IDEN)

CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL)
call counters_smatrix1_stop()

END


Expand Down
Loading

0 comments on commit 2cc82f5

Please sign in to comment.