diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 index b90ef84b47..b64e42a22e 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 @@ -1,8 +1,8 @@ diff --git b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f -index 4fbb8e6ba..f9e2335de 100644 +index 4fbb8e6ba..d5accb9fb 100644 --- b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f +++ a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f -@@ -484,23 +484,140 @@ C +@@ -484,23 +484,142 @@ C INTEGER VECSIZE_USED INTEGER IVEC @@ -40,7 +40,7 @@ index 4fbb8e6ba..f9e2335de 100644 + + IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) +#endif -+ call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 ++ call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO - DO IVEC=1, VECSIZE_USED @@ -67,7 +67,7 @@ index 4fbb8e6ba..f9e2335de 100644 + ENDDO !$OMP END DO !$OMP END PARALLEL -+ call counters_smatrix1multi_stop( -1 ) ! fortran=-1 ++ call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 +#ifdef MG5AMC_MEEXPORTER_CUDACPP + ENDIF + @@ -77,9 +77,10 @@ index 4fbb8e6ba..f9e2335de 100644 + STOP + ENDIF + IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) ++ call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, -+ & SELECTED_HEL2, SELECTED_COL2 ) ++ & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities + FIRST = .FALSE. +c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) + IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -93,22 +94,23 @@ index 4fbb8e6ba..f9e2335de 100644 + ENDIF + WRITE (6,*) 'NGOODHEL =', NGOODHEL + WRITE (6,*) 'NCOMB =', NCOMB ++ call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 + ENDIF -+ call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 ++ call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 + IF ( .NOT. MULTI_CHANNEL ) THEN + CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled + & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, -+ & SELECTED_HEL2, SELECTED_COL2 ) ++ & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities + ELSE + IF( SDE_STRAT.NE.1 ) THEN + WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy + STOP + ENDIF -+ CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ++ CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled + & HEL_RAND, COL_RAND, CHANNEL, OUT2, -+ & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled ++ & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities + ENDIF -+ call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 ++ call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 + ENDIF + + IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) @@ -284,7 +286,7 @@ index 1124a9164..27a6e4674 100644 open(unit=lun,file=tempname,status='old',ERR=20) fopened=.true. diff --git b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f -index e73e654d4..27fbe7302 100644 +index e73e654d4..3072054f2 100644 --- b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f +++ a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f @@ -72,7 +72,10 @@ C @@ -299,15 +301,7 @@ index e73e654d4..27fbe7302 100644 C C This is just to temporarily store the reference grid for C helicity of the DiscreteSampler so as to obtain its number of -@@ -140,6 +143,7 @@ C ---------- - C BEGIN CODE - C ---------- - -+ call counters_smatrix1_start() - NTRY(IMIRROR)=NTRY(IMIRROR)+1 - THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 - DO I=1,NEXTERNAL -@@ -217,6 +221,17 @@ C ---------- +@@ -217,6 +220,17 @@ C ---------- ENDIF IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR)) @@ -325,22 +319,3 @@ index e73e654d4..27fbe7302 100644 ENDIF ENDIF ELSE IF (.NOT.INIT_MODE) THEN ! random helicity -@@ -234,6 +249,7 @@ C Include the Jacobian from helicity sampling - IHEL = HEL_PICKED - ELSE - ANS = 1D0 -+ call counters_smatrix1_stop() - RETURN - ENDIF - IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN -@@ -278,9 +294,8 @@ C Set right sign for ANS, based on sign of chosen helicity - ENDIF - ENDIF - ANS=ANS/DBLE(IDEN) -- - CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) -- -+ call counters_smatrix1_stop() - END - - diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/counters.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/counters.cc index 742575a6a5..8ef58cce80 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/counters.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/counters.cc @@ -21,26 +21,24 @@ extern "C" { // Now: fortran=-1, cudacpp=0 // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; + constexpr unsigned int nimplC = 3; constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } const char* iimplC2TXT( int iimplC ) { const int iimplF = iimplC - 1; switch( iimplF ) { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; + case -1: return "Fortran MEs"; break; + case +0: return "CudaCpp MEs"; break; + case +1: return "CudaCpp HEL"; break; default: assert( false ); break; } } static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; void counters_initialise_() @@ -49,19 +47,6 @@ extern "C" return; } - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) { const unsigned int iimplC = iimplF2C( *iimplF ); @@ -86,13 +71,23 @@ extern "C" printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + { if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + { + if( iimplC < nimplC - 1 ) // MEs + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + else + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC] ); + } + } return; } } diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.inc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fcheck_sa.f b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fcheck_sa.f index 5bbeefbb58..fb942500a5 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fcheck_sa.f +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/CODEGEN/generateAndCompare.sh b/epochX/cudacpp/CODEGEN/generateAndCompare.sh index 41f46cdb5d..6bb6a1c749 100755 --- a/epochX/cudacpp/CODEGEN/generateAndCompare.sh +++ b/epochX/cudacpp/CODEGEN/generateAndCompare.sh @@ -353,6 +353,7 @@ function codeGenAndDiff() | awk -vdate="D:20240301000000+01'00'" '{print gensub("(^/ModDate\\().*(\\)>>endobj$)","\\1"date"\\2","g")}' \ | awk -vdate="D:20240301000000+01'00'" '{print gensub("(^/CreationDate\\().*(\\)$)","\\1"date"\\2","g")}' \ | awk -vid="0123456789abcdef0123456789abcdef" '{print gensub("(^/ID \\[<).*><.*(>\\]$)","\\1"id"><"id"\\2","g")}' \ + | awk -vid="0123456789abcdef0123456789abcdef" '{print gensub("(^/ID \\[\\().*\\)\\(.*(\\)\\]$)","\\1"id")("id"\\2","g")}' \ | awk -vdate="2024-03-01T00:00:00+01:00" '{print gensub("().*()","\\1"date"\\2","g")}' \ | awk -vdate="2024-03-01T00:00:00+01:00" '{print gensub("().*()","\\1"date"\\2","g")}' \ | awk -vuuid="'uuid=01234567-89ab-cdef-0123-456789abcdef'" '{print gensub("(xapMM:DocumentID=).*(/>$)","\\1"uuid"\\2","g")}' \ diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/Bridge.h b/epochX/cudacpp/ee_mumu.mad/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f index 38978865ff..ef45890e25 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f @@ -528,7 +528,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -544,7 +544,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -554,9 +554,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -570,22 +571,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/fcheck_sa.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/fcheck_sa.f index 37d586be72..f0220047d7 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/fcheck_sa.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f index f1b5fc0e1a..c2a8b78ed6 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f @@ -143,7 +143,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -267,7 +266,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -312,8 +310,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/counters.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/counters.cc index 742575a6a5..8ef58cce80 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/counters.cc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/counters.cc @@ -21,26 +21,24 @@ extern "C" { // Now: fortran=-1, cudacpp=0 // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; + constexpr unsigned int nimplC = 3; constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } const char* iimplC2TXT( int iimplC ) { const int iimplF = iimplC - 1; switch( iimplF ) { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; + case -1: return "Fortran MEs"; break; + case +0: return "CudaCpp MEs"; break; + case +1: return "CudaCpp HEL"; break; default: assert( false ); break; } } static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; void counters_initialise_() @@ -49,19 +47,6 @@ extern "C" return; } - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) { const unsigned int iimplC = iimplF2C( *iimplF ); @@ -86,13 +71,23 @@ extern "C" printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + { if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + { + if( iimplC < nimplC - 1 ) // MEs + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + else + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC] ); + } + } return; } } diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/Bridge.h b/epochX/cudacpp/ee_mumu.sa/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/Bridge.h +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/fcheck_sa.f b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/fcheck_sa.f index 37d586be72..f0220047d7 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/fcheck_sa.f +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/Bridge.h b/epochX/cudacpp/gg_tt.mad/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f index f9e2335de4..d5accb9fb2 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f @@ -516,7 +516,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -532,7 +532,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -542,9 +542,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -558,22 +559,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f index 37d586be72..f0220047d7 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f index 27fbe7302c..3072054f2d 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f @@ -143,7 +143,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -249,7 +248,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -294,8 +292,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/counters.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/counters.cc index 742575a6a5..8ef58cce80 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/counters.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/counters.cc @@ -21,26 +21,24 @@ extern "C" { // Now: fortran=-1, cudacpp=0 // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; + constexpr unsigned int nimplC = 3; constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } const char* iimplC2TXT( int iimplC ) { const int iimplF = iimplC - 1; switch( iimplF ) { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; + case -1: return "Fortran MEs"; break; + case +0: return "CudaCpp MEs"; break; + case +1: return "CudaCpp HEL"; break; default: assert( false ); break; } } static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; void counters_initialise_() @@ -49,19 +47,6 @@ extern "C" return; } - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) { const unsigned int iimplC = iimplF2C( *iimplF ); @@ -86,13 +71,23 @@ extern "C" printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + { if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + { + if( iimplC < nimplC - 1 ) // MEs + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + else + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC] ); + } + } return; } } diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/Bridge.h b/epochX/cudacpp/gg_tt.sa/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/fcheck_sa.f b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/fcheck_sa.f index 37d586be72..f0220047d7 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/fcheck_sa.f +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/Bridge.h b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f index f9e2335de4..d5accb9fb2 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f @@ -516,7 +516,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -532,7 +532,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -542,9 +542,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -558,22 +559,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f index 37d586be72..f0220047d7 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f index 27fbe7302c..3072054f2d 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f @@ -143,7 +143,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -249,7 +248,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -294,8 +292,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f index 29cee23b2e..3b6a3f178d 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f @@ -516,7 +516,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -532,7 +532,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -542,9 +542,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -558,22 +559,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/fcheck_sa.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/fcheck_sa.f index 9170a32a19..cb7efdfbcf 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/fcheck_sa.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f index b13c503fae..1dd3491413 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f @@ -159,7 +159,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -265,7 +264,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -310,8 +308,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/counters.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/counters.cc index 742575a6a5..8ef58cce80 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/counters.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/counters.cc @@ -21,26 +21,24 @@ extern "C" { // Now: fortran=-1, cudacpp=0 // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; + constexpr unsigned int nimplC = 3; constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } const char* iimplC2TXT( int iimplC ) { const int iimplF = iimplC - 1; switch( iimplF ) { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; + case -1: return "Fortran MEs"; break; + case +0: return "CudaCpp MEs"; break; + case +1: return "CudaCpp HEL"; break; default: assert( false ); break; } } static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; void counters_initialise_() @@ -49,19 +47,6 @@ extern "C" return; } - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) { const unsigned int iimplC = iimplF2C( *iimplF ); @@ -86,13 +71,23 @@ extern "C" printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + { if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + { + if( iimplC < nimplC - 1 ) // MEs + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + else + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC] ); + } + } return; } } diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/Bridge.h b/epochX/cudacpp/gg_ttg.mad/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f index c9ca1538d3..1c3ba92e6d 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f @@ -516,7 +516,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -532,7 +532,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -542,9 +542,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -558,22 +559,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f index 9170a32a19..cb7efdfbcf 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f index 3d035277eb..6fdf8a8d07 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f @@ -159,7 +159,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -265,7 +264,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -310,8 +308,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/counters.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/counters.cc index 742575a6a5..8ef58cce80 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/counters.cc +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/counters.cc @@ -21,26 +21,24 @@ extern "C" { // Now: fortran=-1, cudacpp=0 // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; + constexpr unsigned int nimplC = 3; constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } const char* iimplC2TXT( int iimplC ) { const int iimplF = iimplC - 1; switch( iimplF ) { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; + case -1: return "Fortran MEs"; break; + case +0: return "CudaCpp MEs"; break; + case +1: return "CudaCpp HEL"; break; default: assert( false ); break; } } static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; void counters_initialise_() @@ -49,19 +47,6 @@ extern "C" return; } - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) { const unsigned int iimplC = iimplF2C( *iimplF ); @@ -86,13 +71,23 @@ extern "C" printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + { if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + { + if( iimplC < nimplC - 1 ) // MEs + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + else + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC] ); + } + } return; } } diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/Bridge.h b/epochX/cudacpp/gg_ttg.sa/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/fcheck_sa.f b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/fcheck_sa.f index 9170a32a19..cb7efdfbcf 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/fcheck_sa.f +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/Bridge.h b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f index 208149fcf6..ddc480ec63 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f @@ -516,7 +516,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -532,7 +532,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -542,9 +542,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -558,22 +559,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/fcheck_sa.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/fcheck_sa.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f index 0413417a30..fdcc390db4 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f @@ -191,7 +191,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -297,7 +296,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -342,8 +340,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/counters.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/counters.cc index 742575a6a5..8ef58cce80 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/counters.cc +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/counters.cc @@ -21,26 +21,24 @@ extern "C" { // Now: fortran=-1, cudacpp=0 // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; + constexpr unsigned int nimplC = 3; constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } const char* iimplC2TXT( int iimplC ) { const int iimplF = iimplC - 1; switch( iimplF ) { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; + case -1: return "Fortran MEs"; break; + case +0: return "CudaCpp MEs"; break; + case +1: return "CudaCpp HEL"; break; default: assert( false ); break; } } static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; void counters_initialise_() @@ -49,19 +47,6 @@ extern "C" return; } - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) { const unsigned int iimplC = iimplF2C( *iimplF ); @@ -86,13 +71,23 @@ extern "C" printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + { if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + { + if( iimplC < nimplC - 1 ) // MEs + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + else + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC] ); + } + } return; } } diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/Bridge.h b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/fcheck_sa.f b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/fcheck_sa.f +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/Bridge.h b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f index 7c94a0776f..5f55c4daed 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f @@ -516,7 +516,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -532,7 +532,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -542,9 +542,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -558,22 +559,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/fcheck_sa.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/fcheck_sa.f index 7722c3af16..870c890410 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/fcheck_sa.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f index b9e6d3613f..aefbff4b80 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f @@ -255,7 +255,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -361,7 +360,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -406,8 +404,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/counters.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/counters.cc index 742575a6a5..8ef58cce80 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/counters.cc +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/counters.cc @@ -21,26 +21,24 @@ extern "C" { // Now: fortran=-1, cudacpp=0 // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; + constexpr unsigned int nimplC = 3; constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } const char* iimplC2TXT( int iimplC ) { const int iimplF = iimplC - 1; switch( iimplF ) { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; + case -1: return "Fortran MEs"; break; + case +0: return "CudaCpp MEs"; break; + case +1: return "CudaCpp HEL"; break; default: assert( false ); break; } } static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; void counters_initialise_() @@ -49,19 +47,6 @@ extern "C" return; } - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) { const unsigned int iimplC = iimplF2C( *iimplF ); @@ -86,13 +71,23 @@ extern "C" printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + { if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + { + if( iimplC < nimplC - 1 ) // MEs + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + else + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC] ); + } + } return; } } diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/Bridge.h b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/fcheck_sa.f b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/fcheck_sa.f index 7722c3af16..870c890410 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/fcheck_sa.f +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/Bridge.h b/epochX/cudacpp/gq_ttq.mad/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f index 2c11f53b89..3d7efb5585 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f @@ -560,7 +560,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -576,7 +576,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -586,9 +586,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -602,22 +603,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f index 9170a32a19..cb7efdfbcf 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f index b3c4ec75f6..c1fb026c9e 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f @@ -162,7 +162,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -281,7 +280,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -326,8 +324,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f index d829a73049..d65bac7611 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f @@ -560,7 +560,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -576,7 +576,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -586,9 +586,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -602,22 +603,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f index 9170a32a19..cb7efdfbcf 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f index 20ec98ad2f..bbe2b8626e 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f @@ -162,7 +162,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -281,7 +280,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -326,8 +324,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/counters.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/counters.cc index 742575a6a5..8ef58cce80 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/counters.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/counters.cc @@ -21,26 +21,24 @@ extern "C" { // Now: fortran=-1, cudacpp=0 // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; + constexpr unsigned int nimplC = 3; constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } const char* iimplC2TXT( int iimplC ) { const int iimplF = iimplC - 1; switch( iimplF ) { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; + case -1: return "Fortran MEs"; break; + case +0: return "CudaCpp MEs"; break; + case +1: return "CudaCpp HEL"; break; default: assert( false ); break; } } static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; void counters_initialise_() @@ -49,19 +47,6 @@ extern "C" return; } - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) { const unsigned int iimplC = iimplF2C( *iimplF ); @@ -86,13 +71,23 @@ extern "C" printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + { if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + { + if( iimplC < nimplC - 1 ) // MEs + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + else + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC] ); + } + } return; } } diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/Bridge.h b/epochX/cudacpp/gq_ttq.sa/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/Bridge.h +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/fcheck_sa.f b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/fcheck_sa.f index 9170a32a19..cb7efdfbcf 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/fcheck_sa.f +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/fcheck_sa.f b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/fcheck_sa.f index 9170a32a19..cb7efdfbcf 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/fcheck_sa.f +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/Bridge.h b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig1.f b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig1.f index d2b257590d..b8bcf54554 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig1.f +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig1.f @@ -516,7 +516,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -532,7 +532,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -542,9 +542,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -558,22 +559,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/fcheck_sa.f b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/fcheck_sa.f index 37d586be72..f0220047d7 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/fcheck_sa.f +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/matrix1.f b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/matrix1.f index 9ae8713f43..5c1baf8703 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/matrix1.f +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/matrix1.f @@ -143,7 +143,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -249,7 +248,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -294,8 +292,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/counters.cc b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/counters.cc index 742575a6a5..8ef58cce80 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/counters.cc +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/counters.cc @@ -21,26 +21,24 @@ extern "C" { // Now: fortran=-1, cudacpp=0 // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; + constexpr unsigned int nimplC = 3; constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } const char* iimplC2TXT( int iimplC ) { const int iimplF = iimplC - 1; switch( iimplF ) { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; + case -1: return "Fortran MEs"; break; + case +0: return "CudaCpp MEs"; break; + case +1: return "CudaCpp HEL"; break; default: assert( false ); break; } } static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; void counters_initialise_() @@ -49,19 +47,6 @@ extern "C" return; } - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) { const unsigned int iimplC = iimplF2C( *iimplF ); @@ -86,13 +71,23 @@ extern "C" printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + { if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + { + if( iimplC < nimplC - 1 ) // MEs + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + else + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC] ); + } + } return; } } diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/Bridge.h b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/Bridge.h +++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/fcheck_sa.f b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/fcheck_sa.f index 37d586be72..f0220047d7 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/fcheck_sa.f +++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/P1_Sigma_heft_gg_bbx/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/Bridge.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f index 7bd8ec493e..c08c7c485d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f @@ -516,7 +516,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -532,7 +532,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -542,9 +542,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -558,22 +559,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/fcheck_sa.f index 37d586be72..f0220047d7 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f index b1f45c3af7..a912a12c0f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f @@ -143,7 +143,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -249,7 +248,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -294,8 +292,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f index c4e476d6c0..868a3ef6c6 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f @@ -571,7 +571,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -587,7 +587,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -597,9 +597,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -613,22 +614,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/fcheck_sa.f index 37d586be72..f0220047d7 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f index 8d74ac5b98..d30687b866 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f @@ -146,7 +146,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -252,7 +251,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -297,8 +295,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f index c9ca1538d3..1c3ba92e6d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f @@ -516,7 +516,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -532,7 +532,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -542,9 +542,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -558,22 +559,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f index 9170a32a19..cb7efdfbcf 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f index 3d035277eb..6fdf8a8d07 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f @@ -159,7 +159,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -265,7 +264,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -310,8 +308,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f index 2c11f53b89..3d7efb5585 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f @@ -560,7 +560,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -576,7 +576,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -586,9 +586,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -602,22 +603,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f index 9170a32a19..cb7efdfbcf 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f index 0a318e1c05..259aaec8a1 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f @@ -162,7 +162,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -268,7 +267,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -313,8 +311,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f index d829a73049..d65bac7611 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f @@ -560,7 +560,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -576,7 +576,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -586,9 +586,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -602,22 +603,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f index 9170a32a19..cb7efdfbcf 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f index f012b48d83..f85cd82256 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f @@ -162,7 +162,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -268,7 +267,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -313,8 +311,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f index 0eb22610bf..89f360f028 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f @@ -571,7 +571,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -587,7 +587,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -597,9 +597,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -613,22 +614,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/fcheck_sa.f index 9170a32a19..cb7efdfbcf 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f index 3d2319b36a..2f6c72fb43 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f @@ -162,7 +162,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -268,7 +267,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -313,8 +311,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f index 6a17e242b2..85dd15d507 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f @@ -516,7 +516,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -532,7 +532,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -542,9 +542,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -558,22 +559,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f index 926b17aa45..2d877b9bc0 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f @@ -191,7 +191,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -297,7 +296,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -342,8 +340,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f index a952958df8..0717127ecc 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f @@ -549,7 +549,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -565,7 +565,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -575,9 +575,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -591,22 +592,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f index 520aaec0b1..74f9ed957c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f @@ -194,7 +194,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -300,7 +299,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -345,8 +343,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f index a41c6f876a..78a109f493 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f @@ -560,7 +560,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -576,7 +576,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -586,9 +586,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -602,22 +603,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f index f77bfa066c..07469eded9 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f @@ -194,7 +194,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -300,7 +299,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -345,8 +343,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f index 700cdbece2..e40cd6c43f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f @@ -560,7 +560,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -576,7 +576,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -586,9 +586,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -602,22 +603,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f index 4c36b4bcce..a72674b621 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f @@ -194,7 +194,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -300,7 +299,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -345,8 +343,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f index bc898ac10e..7648cf57b1 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f @@ -587,7 +587,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -603,7 +603,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -613,9 +613,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -629,22 +630,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f index eec298dc6c..1ea1b00778 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f @@ -196,7 +196,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -302,7 +301,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -347,8 +345,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f index 3db88ba2c3..deb87c2e1c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f @@ -659,7 +659,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -675,7 +675,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -685,9 +685,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -701,22 +702,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f index a530c382f1..62460f03a4 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f @@ -202,7 +202,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -308,7 +307,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -353,8 +351,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f index 8988ba6c1d..bd3cb3fcff 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f @@ -571,7 +571,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -587,7 +587,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -597,9 +597,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -613,22 +614,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f index f6d8294bd3..e4c318e9f7 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f @@ -194,7 +194,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -300,7 +299,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -345,8 +343,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f index 37b6741d5b..ac61617b61 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f @@ -659,7 +659,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -675,7 +675,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -685,9 +685,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -701,22 +702,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f index 4b974a1e79..b2be8a2661 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f @@ -202,7 +202,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -308,7 +307,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -353,8 +351,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f index 4f5f2bb65a..f0bf648d9b 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f @@ -571,7 +571,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -587,7 +587,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -597,9 +597,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -613,22 +614,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f index 3c33819612..ab5c2f5dcc 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f @@ -194,7 +194,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -300,7 +299,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -345,8 +343,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f index 598e4f55b8..e7b63d08c4 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f @@ -571,7 +571,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -587,7 +587,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -597,9 +597,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -613,22 +614,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f index 485ad633d3..db949d4977 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f @@ -194,7 +194,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -300,7 +299,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -345,8 +343,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f index dd3cd5c8a4..765f218d09 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f @@ -587,7 +587,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -603,7 +603,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -613,9 +613,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -629,22 +630,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f index 16d80c44b6..f921e966b9 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f @@ -196,7 +196,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -302,7 +301,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -347,8 +345,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f index ef5dde5b56..8284af5cac 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f @@ -571,7 +571,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -587,7 +587,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -597,9 +597,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -613,22 +614,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/fcheck_sa.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/fcheck_sa.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f index 5510afb41e..c0df727705 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f @@ -194,7 +194,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -300,7 +299,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -345,8 +343,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/counters.cc index 742575a6a5..8ef58cce80 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/counters.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/counters.cc @@ -21,26 +21,24 @@ extern "C" { // Now: fortran=-1, cudacpp=0 // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; + constexpr unsigned int nimplC = 3; constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } const char* iimplC2TXT( int iimplC ) { const int iimplF = iimplC - 1; switch( iimplF ) { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; + case -1: return "Fortran MEs"; break; + case +0: return "CudaCpp MEs"; break; + case +1: return "CudaCpp HEL"; break; default: assert( false ); break; } } static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; void counters_initialise_() @@ -49,19 +47,6 @@ extern "C" return; } - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) { const unsigned int iimplC = iimplF2C( *iimplF ); @@ -86,13 +71,23 @@ extern "C" printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + { if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + { + if( iimplC < nimplC - 1 ) // MEs + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + else + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC] ); + } + } return; } } diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/Bridge.h b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig1.f b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig1.f index 86efacfe7f..461cfa8224 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig1.f +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig1.f @@ -516,7 +516,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -532,7 +532,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -542,9 +542,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -558,22 +559,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/fcheck_sa.f b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/fcheck_sa.f +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/matrix1.f b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/matrix1.f index 4d5cb63761..d96ba556c5 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/matrix1.f +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/matrix1.f @@ -191,7 +191,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -297,7 +296,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -342,8 +340,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/counters.cc b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/counters.cc index 742575a6a5..8ef58cce80 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/counters.cc +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/counters.cc @@ -21,26 +21,24 @@ extern "C" { // Now: fortran=-1, cudacpp=0 // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; + constexpr unsigned int nimplC = 3; constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } const char* iimplC2TXT( int iimplC ) { const int iimplF = iimplC - 1; switch( iimplF ) { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; + case -1: return "Fortran MEs"; break; + case +0: return "CudaCpp MEs"; break; + case +1: return "CudaCpp HEL"; break; default: assert( false ); break; } } static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; void counters_initialise_() @@ -49,19 +47,6 @@ extern "C" return; } - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) { const unsigned int iimplC = iimplF2C( *iimplF ); @@ -86,13 +71,23 @@ extern "C" printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + { if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + { + if( iimplC < nimplC - 1 ) // MEs + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + else + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC] ); + } + } return; } } diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/Bridge.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/Bridge.h +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/fcheck_sa.f b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/fcheck_sa.f index 32f6c3207c..6a66bac979 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/fcheck_sa.f +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/Bridge.h b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig1.f b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig1.f index 69a8372b3e..0170f78a25 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig1.f +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig1.f @@ -516,7 +516,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -532,7 +532,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -542,9 +542,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -558,22 +559,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/fcheck_sa.f b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/fcheck_sa.f index 37d586be72..f0220047d7 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/fcheck_sa.f +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/matrix1.f b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/matrix1.f index b1f74c86e4..bfb95cf2ee 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/matrix1.f +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/matrix1.f @@ -131,7 +131,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -237,7 +236,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -282,8 +280,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/counters.cc b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/counters.cc index 742575a6a5..8ef58cce80 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/counters.cc +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/counters.cc @@ -21,26 +21,24 @@ extern "C" { // Now: fortran=-1, cudacpp=0 // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; + constexpr unsigned int nimplC = 3; constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } const char* iimplC2TXT( int iimplC ) { const int iimplF = iimplC - 1; switch( iimplF ) { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; + case -1: return "Fortran MEs"; break; + case +0: return "CudaCpp MEs"; break; + case +1: return "CudaCpp HEL"; break; default: assert( false ); break; } } static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; void counters_initialise_() @@ -49,19 +47,6 @@ extern "C" return; } - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) { const unsigned int iimplC = iimplF2C( *iimplF ); @@ -86,13 +71,23 @@ extern "C" printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + { if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + { + if( iimplC < nimplC - 1 ) // MEs + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + else + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC] ); + } + } return; } } diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/Bridge.h b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/Bridge.h +++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/fcheck_sa.f b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/fcheck_sa.f index 37d586be72..f0220047d7 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/fcheck_sa.f +++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/Bridge.h b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/Bridge.h +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f index f9e2335de4..d5accb9fb2 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f @@ -516,7 +516,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, IF( FBRIDGE_MODE .LE. 0 ) THEN ! (FortranOnly=0 or BothQuiet=-1 or BothDebug=-2) #endif - call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortran=-1 + call counters_smatrix1multi_start( -1, VECSIZE_USED ) ! fortranMEs=-1 !$OMP PARALLEL !$OMP DO DO IVEC=1, VECSIZE_USED @@ -532,7 +532,7 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDDO !$OMP END DO !$OMP END PARALLEL - call counters_smatrix1multi_stop( -1 ) ! fortran=-1 + call counters_smatrix1multi_stop( -1 ) ! fortranMEs=-1 #ifdef MG5AMC_MEEXPORTER_CUDACPP ENDIF @@ -542,9 +542,10 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, STOP ENDIF IF ( FIRST ) THEN ! exclude first pass (helicity filtering) from timers (#461) + call counters_smatrix1multi_start( 1, VECSIZE_USED ) ! cudacppHEL=1 CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled for helicity filtering & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .TRUE.) ! quit after computing helicities FIRST = .FALSE. c ! This is a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/22 (see PR #486) IF( FBRIDGE_MODE .EQ. 1 ) THEN ! (CppOnly=1 : SMATRIX1 is not called at all) @@ -558,22 +559,23 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNEL, ENDIF WRITE (6,*) 'NGOODHEL =', NGOODHEL WRITE (6,*) 'NCOMB =', NCOMB + call counters_smatrix1multi_stop( 1 ) ! cudacppHEL=1 ENDIF - call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacpp=0 + call counters_smatrix1multi_start( 0, VECSIZE_USED ) ! cudacppMEs=0 IF ( .NOT. MULTI_CHANNEL ) THEN CALL FBRIDGESEQUENCE_NOMULTICHANNEL( FBRIDGE_PBRIDGE, ! multi channel disabled & P_MULTI, ALL_G, HEL_RAND, COL_RAND, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ELSE IF( SDE_STRAT.NE.1 ) THEN WRITE(6,*) 'ERROR! The cudacpp bridge requires SDE=1' ! multi channel single-diagram enhancement strategy STOP ENDIF - CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, + CALL FBRIDGESEQUENCE(FBRIDGE_PBRIDGE, P_MULTI, ALL_G, ! multi channel enabled & HEL_RAND, COL_RAND, CHANNEL, OUT2, - & SELECTED_HEL2, SELECTED_COL2 ) ! 1-N: multi channel enabled + & SELECTED_HEL2, SELECTED_COL2, .FALSE.) ! do not quit after computing helicities ENDIF - call counters_smatrix1multi_stop( 0 ) ! cudacpp=0 + call counters_smatrix1multi_stop( 0 ) ! cudacppMEs=0 ENDIF IF( FBRIDGE_MODE .LT. 0 ) THEN ! (BothQuiet=-1 or BothDebug=-2) diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f index 37d586be72..f0220047d7 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f index bc79ed4217..aa332cd578 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f @@ -143,7 +143,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, C BEGIN CODE C ---------- - call counters_smatrix1_start() NTRY(IMIRROR)=NTRY(IMIRROR)+1 THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1 DO I=1,NEXTERNAL @@ -249,7 +248,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, IHEL = HEL_PICKED ELSE ANS = 1D0 - call counters_smatrix1_stop() RETURN ENDIF IF (ANS.NE.0D0.AND.(ISUM_HEL .NE. 1.OR.HEL_PICKED.EQ.-1)) THEN @@ -294,8 +292,9 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, ENDIF ENDIF ANS=ANS/DBLE(IDEN) + CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1, ICOL) - call counters_smatrix1_stop() + END diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/counters.cc b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/counters.cc index 742575a6a5..8ef58cce80 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/counters.cc +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/counters.cc @@ -21,26 +21,24 @@ extern "C" { // Now: fortran=-1, cudacpp=0 // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; + constexpr unsigned int nimplC = 3; constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } const char* iimplC2TXT( int iimplC ) { const int iimplF = iimplC - 1; switch( iimplF ) { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; + case -1: return "Fortran MEs"; break; + case +0: return "CudaCpp MEs"; break; + case +1: return "CudaCpp HEL"; break; default: assert( false ); break; } } static mgOnGpu::Timer program_timer; static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; static mgOnGpu::Timer smatrix1multi_timer[nimplC]; static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; static int smatrix1multi_counter[nimplC] = { 0 }; void counters_initialise_() @@ -49,19 +47,6 @@ extern "C" return; } - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) { const unsigned int iimplC = iimplF2C( *iimplF ); @@ -86,13 +71,23 @@ extern "C" printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + { if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + { + if( iimplC < nimplC - 1 ) // MEs + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + else + printf( " [COUNTERS] %11s ( %1d ) : %9.4fs\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC] ); + } + } return; } } diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/fbridge.cc b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/fbridge.inc b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/Bridge.h b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/Bridge.h index 4bf2198dd1..60eb101a6a 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/Bridge.h +++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/Bridge.h @@ -109,9 +109,9 @@ namespace mg5amcCpu * @param rndcol the pointer to the input random numbers for color selection * @param channelId the Feynman diagram to enhance in multi-channel mode if 1 to n (disable multi-channel if 0) * @param mes the pointer to the output matrix elements - * @param goodHelOnly quit after computing good helicities? * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void gpu_sequence( const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/fcheck_sa.f b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/fcheck_sa.f index 37d586be72..f0220047d7 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/fcheck_sa.f +++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/fcheck_sa.f @@ -63,7 +63,7 @@ PROGRAM FCHECK_SA GS(IEVT) = 1.2177157847767195 ! fixed G for aS=0.118 (hardcoded for now in check_sa.cc, fcheck_sa.f, runTest.cc) END DO CALL FBRIDGESEQUENCE_NOMULTICHANNEL(BRIDGE, MOMENTA, GS, ! TEMPORARY? disable multi-channel in fcheck.exe and fgcheck.exe #466 - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, .FALSE.) ! do not quit after computing helicities DO IEVT = 1, NEVT c DO IEXTERNAL = 1, NEXTERNAL c WRITE(6,*) 'MOMENTA', IEVT, IEXTERNAL, diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/fbridge.cc b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/fbridge.cc index 8a5b8be9c0..99efcb1dbe 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/fbridge.cc +++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/fbridge.cc @@ -83,6 +83,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -92,18 +93,20 @@ extern "C" const unsigned int* pchannelId, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { Bridge* pbridge = dynamic_cast*>( *ppbridge ); + //printf("fbridgesequence_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); if( pbridge == 0 ) throw std::runtime_error( "fbridgesequence_: invalid Bridge address" ); #ifdef MGONGPUCPP_GPUIMPL // Use the device/GPU implementation in the CUDA library // (there is also a host implementation in this library) - pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->gpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #else // Use the host/CPU implementation in the C++ library // (there is no device implementation in this library) - pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol ); + pbridge->cpu_sequence( momenta, gs, rndhel, rndcol, ( pchannelId ? *pchannelId : 0 ), mes, selhel, selcol, *pgoodHelOnly ); #endif } @@ -119,6 +122,7 @@ extern "C" * @param mes the pointer to the output matrix elements * @param selhel the pointer to the output selected helicities * @param selcol the pointer to the output selected colors + * @param goodHelOnly quit after computing good helicities? */ void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, @@ -127,9 +131,11 @@ extern "C" const FORTRANFPTYPE* rndcol, FORTRANFPTYPE* mes, int* selhel, - int* selcol ) + int* selcol, + const bool* pgoodHelOnly ) { - fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol ); + //printf("fbridgesequence_nomultichannel_ goodHelOnly=%d\n", ( *pgoodHelOnly ? 1 : 0 ) ); + fbridgesequence_( ppbridge, momenta, gs, rndhel, rndcol, nullptr, mes, selhel, selcol, pgoodHelOnly ); } /** diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/fbridge.inc b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/fbridge.inc index 422aa67cf9..a28622cdb6 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/fbridge.inc +++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/fbridge.inc @@ -40,10 +40,11 @@ C - CHANID: the input Feynman diagram to enhance in multi-channel mode if 1 to C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, CHANID, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -53,6 +54,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE END INTERFACE @@ -66,10 +68,11 @@ C - RNDCOL: the input random number Fortran array for color selection C - MES: the output matrix element Fortran array C - SELHEL: the output selected helicity Fortran array C - SELCOL: the output selected color Fortran array +C - HELONLY: input flag, quit after computing good helicities? C INTERFACE SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL(PBRIDGE, MOMENTA, GS, - & RNDHEL, RNDCOL, MES, SELHEL, SELCOL) + & RNDHEL, RNDCOL, MES, SELHEL, SELCOL, HELONLY) INTEGER*8 PBRIDGE DOUBLE PRECISION MOMENTA(*) DOUBLE PRECISION GS(*) @@ -78,6 +81,7 @@ C DOUBLE PRECISION MES(*) INTEGER*4 SELHEL(*) INTEGER*4 SELCOL(*) + LOGICAL HELONLY END SUBROUTINE FBRIDGESEQUENCE_NOMULTICHANNEL END INTERFACE diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index ccecc02825..e2dbe75af6 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,22 +1,22 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 + +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - -make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:16:14 +DATE: 2024-08-08_17:15:28 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.8426s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7989s - [COUNTERS] Fortran MEs ( 1 ) : 0.0437s for 8192 events => throughput is 1.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8040s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7617s + [COUNTERS] Fortran MEs ( 1 ) : 0.0423s for 8192 events => throughput is 1.94E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4393s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3952s - [COUNTERS] Fortran MEs ( 1 ) : 0.0441s for 8192 events => throughput is 1.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4169s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3758s + [COUNTERS] Fortran MEs ( 1 ) : 0.0411s for 8192 events => throughput is 1.99E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.8386s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3588s - [COUNTERS] Fortran MEs ( 1 ) : 0.4798s for 90112 events => throughput is 1.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7676s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3130s + [COUNTERS] Fortran MEs ( 1 ) : 0.4546s for 90112 events => throughput is 1.98E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.5057s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4597s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0460s for 8192 events => throughput is 1.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4218s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3779s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0434s for 8192 events => throughput is 1.89E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989099] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.9929s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4771s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5158s for 90112 events => throughput is 1.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7975s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3129s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4842s for 90112 events => throughput is 1.86E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.819871e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.882626e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.838165e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.884788e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4393s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4133s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0260s for 8192 events => throughput is 3.15E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3992s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3741s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0248s for 8192 events => throughput is 3.31E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989106] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.6401s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3507s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2894s for 90112 events => throughput is 3.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5827s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3079s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2743s for 90112 events => throughput is 3.28E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.138027e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.394268e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.282175e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.310645e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4390s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4228s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0162s for 8192 events => throughput is 5.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3922s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3762s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0156s for 8192 events => throughput is 5.25E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989135] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5071s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3263s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1808s for 90112 events => throughput is 4.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4801s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3120s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1677s for 90112 events => throughput is 5.37E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.258519e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.229487e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.342811e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.317415e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4034s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3888s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0146s for 8192 events => throughput is 5.60E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3908s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3762s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0142s for 8192 events => throughput is 5.78E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989135] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4583s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2960s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1623s for 90112 events => throughput is 5.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5331s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3679s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1648s for 90112 events => throughput is 5.47E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.840330e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.766382e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.907730e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.801267e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3956s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3739s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0217s for 8192 events => throughput is 3.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4023s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3792s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0226s for 8192 events => throughput is 3.62E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989135] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4701s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2380s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2321s for 90112 events => throughput is 3.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5410s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3024s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2380s for 90112 events => throughput is 3.79E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.585624e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.832096e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.722456e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.818192e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.8009s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8003s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.42E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8181s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8166s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.26E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7129s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7060s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0069s for 90112 events => throughput is 1.31E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7439s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7365s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0065s for 90112 events => throughput is 1.38E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.914318e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.060445e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.613535e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.634268e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.870641e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.134087e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.085338e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.082840e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.804787e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.185634e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.159663e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.159507e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.702330e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.211421e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.067580e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.085335e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***