diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index b8847b7cb6..01107f564b 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:15:24 +DATE: 2024-08-08_20:42:55 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.7267s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7180s - [COUNTERS] Fortran MEs ( 1 ) : 0.0087s for 8192 events => throughput is 9.36E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6950s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6868s + [COUNTERS] Fortran MEs ( 1 ) : 0.0082s for 8192 events => throughput is 1.00E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1784s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1702s - [COUNTERS] Fortran MEs ( 1 ) : 0.0082s for 8192 events => throughput is 1.00E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1770s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1693s + [COUNTERS] Fortran MEs ( 1 ) : 0.0077s for 8192 events => throughput is 1.07E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3705s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2837s - [COUNTERS] Fortran MEs ( 1 ) : 0.0868s for 90112 events => throughput is 1.04E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3730s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2895s + [COUNTERS] Fortran MEs ( 1 ) : 0.0835s for 90112 events => throughput is 1.08E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661545E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1849s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1783s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 8192 events => throughput is 1.24E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1777s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1702s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0072s for 8192 events => throughput is 1.14E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3665s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2911s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0754s for 90112 events => throughput is 1.19E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3648s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2879s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0766s for 90112 events => throughput is 1.18E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.152979e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.167196e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.181726e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.165900e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1799s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1755s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0044s for 8192 events => throughput is 1.87E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1752s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1704s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0045s for 8192 events => throughput is 1.83E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3337s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2882s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0455s for 90112 events => throughput is 1.98E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3353s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2887s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0463s for 90112 events => throughput is 1.94E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.930086e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.918558e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.984749e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.023579e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1789s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1758s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.61E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1786s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1750s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.48E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3219s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2881s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0338s for 90112 events => throughput is 2.67E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3295s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2928s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0363s for 90112 events => throughput is 2.48E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.539184e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.640473e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.709927e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.831088e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1781s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1751s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.77E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1752s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1718s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.65E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3264s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2932s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0332s for 90112 events => throughput is 2.71E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3209s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2867s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0338s for 90112 events => throughput is 2.66E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.635946e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.678759e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.766552e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.813366e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1769s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1731s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0038s for 8192 events => throughput is 2.16E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1736s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1692s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0040s for 8192 events => throughput is 2.04E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3421s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3007s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0414s for 90112 events => throughput is 2.18E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3322s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2913s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0405s for 90112 events => throughput is 2.22E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.124836e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.108602e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.174846e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.253882e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.6131s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6126s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.49E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6096s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6084s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.32E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.7401s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7351s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0050s for 90112 events => throughput is 1.79E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7166s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7111s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.84E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.749639e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.377977e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.937376e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.939853e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.647566e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.088090e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.462558e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.478718e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.641748e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.243737e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.002002e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.989285e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.659137e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.238682e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.136180e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.131222e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 587bb76d73..617aae1ec8 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,17 +1,17 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone - make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 -make USEBUILDDIR=1 BACKEND=cpp512y +make USEBUILDDIR=1 BACKEND=cppavx2 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + +make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:15:40 +DATE: 2024-08-08_20:43:11 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.7250s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7166s - [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.79E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7259s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7175s + [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.72E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1873s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1787s - [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.52E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1878s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1797s + [COUNTERS] Fortran MEs ( 1 ) : 0.0081s for 8192 events => throughput is 1.01E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3976s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3042s - [COUNTERS] Fortran MEs ( 1 ) : 0.0933s for 90112 events => throughput is 9.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3875s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3018s + [COUNTERS] Fortran MEs ( 1 ) : 0.0857s for 90112 events => throughput is 1.05E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382703205998396E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1981s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1911s + [COUNTERS] PROGRAM TOTAL : 0.1866s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1794s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0070s for 8192 events => throughput is 1.17E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515590123565249E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3643s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2913s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0730s for 90112 events => throughput is 1.24E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3784s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3020s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0761s for 90112 events => throughput is 1.18E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.200646e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.232262e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.237867e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.234403e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382700723828302E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1729s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1703s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.23E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1808s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1776s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.88E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515587612890761E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3200s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2910s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0290s for 90112 events => throughput is 3.10E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3276s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2977s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0297s for 90112 events => throughput is 3.03E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.187376e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.119755e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.220665e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.282267e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382700679354239E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1773s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1751s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.62E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1827s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1799s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.27E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515587619408464E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3126s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2872s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0254s for 90112 events => throughput is 3.54E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3317s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3038s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0276s for 90112 events => throughput is 3.26E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.488004e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.481016e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.612737e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.570800e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382700679354239E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1769s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1747s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.74E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1855s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1828s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.35E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515587619408464E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3121s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2873s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0247s for 90112 events => throughput is 3.65E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3314s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3041s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 90112 events => throughput is 3.33E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.702442e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.644439e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.804759e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.697078e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382704335459282E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1768s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1743s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.39E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1845s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1814s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 3.04E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515591296252558E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3213s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2952s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0261s for 90112 events => throughput is 3.45E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3372s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3079s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0290s for 90112 events => throughput is 3.10E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.323941e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.387501e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.761942e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.616268e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382706077425631E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.5917s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5912s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.66E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6084s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6073s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.48E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515592892887687E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.7121s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7074s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0047s for 90112 events => throughput is 1.92E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7292s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7238s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0048s for 90112 events => throughput is 1.86E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.122217e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.601368e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.609665e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.718163e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.582586e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.633474e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.902910e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.898384e+09 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.573732e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.829286e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.085324e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.104797e+09 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.048658e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.012752e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.737473e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.802072e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index f580fe1044..e51bbf394d 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:15:57 +DATE: 2024-08-08_20:43:26 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.6825s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6742s - [COUNTERS] Fortran MEs ( 1 ) : 0.0083s for 8192 events => throughput is 9.83E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6983s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6906s + [COUNTERS] Fortran MEs ( 1 ) : 0.0077s for 8192 events => throughput is 1.06E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,8 +83,8 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1788s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1708s + [COUNTERS] PROGRAM TOTAL : 0.1791s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1711s [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.02E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3707s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2832s - [COUNTERS] Fortran MEs ( 1 ) : 0.0875s for 90112 events => throughput is 1.03E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3694s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2869s + [COUNTERS] Fortran MEs ( 1 ) : 0.0825s for 90112 events => throughput is 1.09E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715420701395E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1866s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1792s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0073s for 8192 events => throughput is 1.12E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1846s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1767s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0075s for 8192 events => throughput is 1.09E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602033080859E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3744s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2956s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0787s for 90112 events => throughput is 1.14E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3660s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2865s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0792s for 90112 events => throughput is 1.14E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.113565e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.124575e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.142626e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.154252e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715420701354E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1769s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1728s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0040s for 8192 events => throughput is 2.04E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1757s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1709s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0044s for 8192 events => throughput is 1.88E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602033080859E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3372s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2926s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0446s for 90112 events => throughput is 2.02E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3336s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2878s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0455s for 90112 events => throughput is 1.98E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.964919e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.982594e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.051752e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.052848e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1805s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1771s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.46E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1749s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1711s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.31E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3444s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3072s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0371s for 90112 events => throughput is 2.43E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3282s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2920s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0358s for 90112 events => throughput is 2.51E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.534941e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.552156e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.720296e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.649390e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1857s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1827s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.69E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1744s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1708s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.47E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3391s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3032s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0359s for 90112 events => throughput is 2.51E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3217s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2876s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0337s for 90112 events => throughput is 2.67E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.692433e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.650509e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.750694e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.719714e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1831s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1796s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.31E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1750s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1712s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.36E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3483s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3066s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0416s for 90112 events => throughput is 2.16E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3264s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2866s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0395s for 90112 events => throughput is 2.28E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.156388e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.207219e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.257287e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.300574e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715392009194E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.5936s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5931s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.57E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5992s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5980s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.38E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602021089631E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.7192s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7142s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0050s for 90112 events => throughput is 1.81E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7158s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7101s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0050s for 90112 events => throughput is 1.80E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.699826e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.054665e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.973881e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.970842e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.642059e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.242307e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.500425e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.491734e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.034916e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.221256e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.050339e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.104459e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.807602e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.208981e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.156854e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.160987e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index e2dbe75af6..8d24f348d7 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,8 +1,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx - make USEBUILDDIR=1 BACKEND=cuda + make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-08_17:15:28 +DATE: 2024-08-08_20:43:42 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.8040s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7617s - [COUNTERS] Fortran MEs ( 1 ) : 0.0423s for 8192 events => throughput is 1.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8083s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7667s + [COUNTERS] Fortran MEs ( 1 ) : 0.0416s for 8192 events => throughput is 1.97E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4169s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3758s - [COUNTERS] Fortran MEs ( 1 ) : 0.0411s for 8192 events => throughput is 1.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4194s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3777s + [COUNTERS] Fortran MEs ( 1 ) : 0.0417s for 8192 events => throughput is 1.97E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7676s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3130s - [COUNTERS] Fortran MEs ( 1 ) : 0.4546s for 90112 events => throughput is 1.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7491s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2980s + [COUNTERS] Fortran MEs ( 1 ) : 0.4511s for 90112 events => throughput is 2.00E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4218s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3779s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0434s for 8192 events => throughput is 1.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4196s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3765s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0426s for 8192 events => throughput is 1.92E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,10 +167,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989099] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7975s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3129s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4842s for 90112 events => throughput is 1.86E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 1.7813s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2997s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4811s for 90112 events => throughput is 1.87E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.882626e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.879822e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.884788e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.903748e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -211,9 +211,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3992s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3741s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0248s for 8192 events => throughput is 3.31E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4107s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3853s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0250s for 8192 events => throughput is 3.28E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -245,9 +245,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989106] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5827s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3079s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2743s for 90112 events => throughput is 3.28E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5717s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3004s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2709s for 90112 events => throughput is 3.33E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.394268e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.310019e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.310645e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.203674e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -289,9 +289,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3922s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3762s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0156s for 8192 events => throughput is 5.25E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3916s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3758s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0153s for 8192 events => throughput is 5.35E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -323,9 +323,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989135] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4801s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3120s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1677s for 90112 events => throughput is 5.37E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4759s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3059s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1696s for 90112 events => throughput is 5.31E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.229487e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.223657e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.317415e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.200982e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -367,9 +367,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3908s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3762s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0142s for 8192 events => throughput is 5.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3953s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3808s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0141s for 8192 events => throughput is 5.80E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -401,9 +401,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989135] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5331s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3679s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1648s for 90112 events => throughput is 5.47E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4542s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3022s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1516s for 90112 events => throughput is 5.94E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.766382e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.865744e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.801267e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.035557e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -445,9 +445,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4023s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3792s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0226s for 8192 events => throughput is 3.62E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4098s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3854s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0239s for 8192 events => throughput is 3.43E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -479,9 +479,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989135] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5410s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3024s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2380s for 90112 events => throughput is 3.79E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5428s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3039s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2384s for 90112 events => throughput is 3.78E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.832096e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.669812e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.818192e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.898434e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -523,9 +523,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.8181s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8166s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.26E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8047s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8033s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.27E+07 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -557,9 +557,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7439s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7365s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0065s for 90112 events => throughput is 1.38E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7304s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7231s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 90112 events => throughput is 1.37E+07 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -573,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.060445e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.008892e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.634268e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.654647e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.134087e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.331472e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.082840e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.082448e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.185634e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.310542e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.159507e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.160861e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.211421e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.331806e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.085335e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.063253e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index f46b75eef7..420861126b 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,11 +1,11 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx - make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:16:41 +DATE: 2024-08-08_20:44:09 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.7783s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7371s - [COUNTERS] Fortran MEs ( 1 ) : 0.0411s for 8192 events => throughput is 1.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8019s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7604s + [COUNTERS] Fortran MEs ( 1 ) : 0.0415s for 8192 events => throughput is 1.97E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3978s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3570s - [COUNTERS] Fortran MEs ( 1 ) : 0.0407s for 8192 events => throughput is 2.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4215s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3800s + [COUNTERS] Fortran MEs ( 1 ) : 0.0416s for 8192 events => throughput is 1.97E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.6995s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2512s - [COUNTERS] Fortran MEs ( 1 ) : 0.4483s for 90112 events => throughput is 2.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7567s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3058s + [COUNTERS] Fortran MEs ( 1 ) : 0.4510s for 90112 events => throughput is 2.00E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094179692708323] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4346s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3943s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0403s for 8192 events => throughput is 2.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4203s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3790s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0410s for 8192 events => throughput is 2.00E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105688388783328] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.6901s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2471s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4430s for 90112 events => throughput is 2.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7678s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3093s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4582s for 90112 events => throughput is 1.97E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.030986e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.984608e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.985412e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.996032e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094175707109216] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3873s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3704s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0168s for 8192 events => throughput is 4.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3923s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3751s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0170s for 8192 events => throughput is 4.83E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105684583433771] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4062s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2257s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1804s for 90112 events => throughput is 4.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4893s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3053s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1837s for 90112 events => throughput is 4.90E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.724911e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.831484e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.762208e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.765454e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094173726920275] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3731s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3646s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 8192 events => throughput is 9.69E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3873s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3779s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0091s for 8192 events => throughput is 8.99E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105684037363524] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3134s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2174s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0960s for 90112 events => throughput is 9.38E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4091s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3116s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0972s for 90112 events => throughput is 9.27E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.106025e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.995090e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.235160e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.148417e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094173726920275] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3725s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3646s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0079s for 8192 events => throughput is 1.04E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3894s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3807s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 8192 events => throughput is 9.68E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105684037363524] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3066s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2161s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0906s for 90112 events => throughput is 9.95E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3961s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3040s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0917s for 90112 events => throughput is 9.82E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.917882e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.994646e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.709952e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.882184e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094178448427996] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3747s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3637s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0110s for 8192 events => throughput is 7.46E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3945s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3828s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0112s for 8192 events => throughput is 7.31E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105688391432061] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3429s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2200s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1229s for 90112 events => throughput is 7.33E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5017s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3657s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1356s for 90112 events => throughput is 6.64E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.804453e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.837763e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.929498e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.925566e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184162782994] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.7869s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7863s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.57E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8112s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8099s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.43E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105694501043516] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.6604s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6548s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0056s for 90112 events => throughput is 1.61E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7829s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7765s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0057s for 90112 events => throughput is 1.58E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.908595e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.085941e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.266019e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.178660e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.049207e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.983696e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.416476e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.406286e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.062460e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.010543e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.551978e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.536473e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.413712e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.527299e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.519394e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.475317e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index dc1bcf4827..65f004f30e 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -3,20 +3,20 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda + make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 + +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - -make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:17:06 +DATE: 2024-08-08_20:44:34 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.7779s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7372s - [COUNTERS] Fortran MEs ( 1 ) : 0.0407s for 8192 events => throughput is 2.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8115s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7704s + [COUNTERS] Fortran MEs ( 1 ) : 0.0411s for 8192 events => throughput is 1.99E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3947s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3534s - [COUNTERS] Fortran MEs ( 1 ) : 0.0413s for 8192 events => throughput is 1.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4214s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3805s + [COUNTERS] Fortran MEs ( 1 ) : 0.0408s for 8192 events => throughput is 2.01E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7153s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2615s - [COUNTERS] Fortran MEs ( 1 ) : 0.4538s for 90112 events => throughput is 1.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7670s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3128s + [COUNTERS] Fortran MEs ( 1 ) : 0.4542s for 90112 events => throughput is 1.98E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186141863901] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4374s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3938s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0436s for 8192 events => throughput is 1.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4222s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3775s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0442s for 8192 events => throughput is 1.85E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696630006634] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7264s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2487s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4777s for 90112 events => throughput is 1.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7889s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3008s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4877s for 90112 events => throughput is 1.85E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.883629e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.863098e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.895894e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.876650e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186141863901] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4011s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3774s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0237s for 8192 events => throughput is 3.45E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4042s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3795s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0242s for 8192 events => throughput is 3.38E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696630006626] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4880s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2278s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2602s for 90112 events => throughput is 3.46E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5750s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3065s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2680s for 90112 events => throughput is 3.36E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.317423e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.334875e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.373113e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.372227e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3877s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3732s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0145s for 8192 events => throughput is 5.66E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3946s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3794s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0147s for 8192 events => throughput is 5.56E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3817s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2190s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1627s for 90112 events => throughput is 5.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4696s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3034s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1658s for 90112 events => throughput is 5.44E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.391740e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.223051e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.381923e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.767945e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3811s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3680s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0131s for 8192 events => throughput is 6.26E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4019s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3874s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0142s for 8192 events => throughput is 5.78E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3634s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2153s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1482s for 90112 events => throughput is 6.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4595s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3077s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1514s for 90112 events => throughput is 5.95E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.907882e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.889622e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.901678e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.919078e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3949s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3745s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0204s for 8192 events => throughput is 4.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4002s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3783s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0214s for 8192 events => throughput is 3.82E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4596s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2305s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2291s for 90112 events => throughput is 3.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5451s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3093s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2354s for 90112 events => throughput is 3.83E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.830144e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.737875e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.799392e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.863403e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184798437830] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.7822s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7816s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.47E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8029s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8014s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.26E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279068492] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.6477s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6414s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 90112 events => throughput is 1.42E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7390s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7315s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 90112 events => throughput is 1.36E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.955290e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.004360e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.610728e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.618155e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.721513e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.337805e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.057949e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.064726e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.718962e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.321717e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.127927e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.141622e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.720504e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.487761e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.981768e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.948699e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index daad34ef63..c52a8af2f9 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -3,9 +3,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda - make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:17:32 +DATE: 2024-08-08_20:45:01 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.6619s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3441s - [COUNTERS] Fortran MEs ( 1 ) : 0.3178s for 8192 events => throughput is 2.58E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6887s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3666s + [COUNTERS] Fortran MEs ( 1 ) : 0.3221s for 8192 events => throughput is 2.54E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6512s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3209s - [COUNTERS] Fortran MEs ( 1 ) : 0.3303s for 8192 events => throughput is 2.48E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6558s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3350s + [COUNTERS] Fortran MEs ( 1 ) : 0.3208s for 8192 events => throughput is 2.55E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.0566s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4770s - [COUNTERS] Fortran MEs ( 1 ) : 3.5796s for 90112 events => throughput is 2.52E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.1103s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5412s + [COUNTERS] Fortran MEs ( 1 ) : 3.5692s for 90112 events => throughput is 2.52E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.9732s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6412s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3320s for 8192 events => throughput is 2.47E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6762s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3380s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3370s for 8192 events => throughput is 2.43E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717666E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.4802s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8183s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.6619s for 90112 events => throughput is 2.46E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.2687s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5495s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.7180s for 90112 events => throughput is 2.42E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.542070e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.517328e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.548855e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.477316e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607748863] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6596s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4845s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1750s for 8192 events => throughput is 4.68E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5207s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3399s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1801s for 8192 events => throughput is 4.55E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717666E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 3.5748s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6476s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.9272s for 90112 events => throughput is 4.68E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.4936s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5370s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.9559s for 90112 events => throughput is 4.61E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.742087e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.723167e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.785084e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.710741e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4868s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4003s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0864s for 8192 events => throughput is 9.48E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4289s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3383s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0899s for 8192 events => throughput is 9.11E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.5034s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5585s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9448s for 90112 events => throughput is 9.54E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.5415s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5644s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9765s for 90112 events => throughput is 9.23E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.300762e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.063994e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.558512e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.113779e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4736s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3967s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0769s for 8192 events => throughput is 1.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4521s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3684s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0831s for 8192 events => throughput is 9.86E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.4089s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5590s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8498s for 90112 events => throughput is 1.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4440s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5615s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8818s for 90112 events => throughput is 1.02E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.073418e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.056563e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.076553e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.066565e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5274s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4198s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1076s for 8192 events => throughput is 7.62E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4498s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3385s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1106s for 8192 events => throughput is 7.41E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.8167s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6114s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2053s for 90112 events => throughput is 7.48E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.7606s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5479s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2119s for 90112 events => throughput is 7.44E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.600195e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.524660e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.737653e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.502357e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.7530s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7476s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.52E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.8444s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8355s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0059s for 8192 events => throughput is 1.38E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0029s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717736E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 1.9540s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9311s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0229s for 90112 events => throughput is 3.93E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.9827s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9565s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0233s for 90112 events => throughput is 3.86E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.632787e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.637288e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.212630e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.243124e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.415553e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.002014e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.240897e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.239487e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.415697e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.002136e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.251024e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.250655e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.408414e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.001900e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.766536e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.746731e+06 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 51c84bcce7..b25cff31e4 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -2,12 +2,12 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda - make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 + make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:18:15 +DATE: 2024-08-08_20:45:43 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.6622s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3419s - [COUNTERS] Fortran MEs ( 1 ) : 0.3203s for 8192 events => throughput is 2.56E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6879s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3658s + [COUNTERS] Fortran MEs ( 1 ) : 0.3221s for 8192 events => throughput is 2.54E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6540s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3233s - [COUNTERS] Fortran MEs ( 1 ) : 0.3307s for 8192 events => throughput is 2.48E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6575s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3322s + [COUNTERS] Fortran MEs ( 1 ) : 0.3252s for 8192 events => throughput is 2.52E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.0535s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4792s - [COUNTERS] Fortran MEs ( 1 ) : 3.5744s for 90112 events => throughput is 2.52E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.0903s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5245s + [COUNTERS] Fortran MEs ( 1 ) : 3.5658s for 90112 events => throughput is 2.53E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112722616246457] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.9529s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6280s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3249s for 8192 events => throughput is 2.52E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6630s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3346s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3273s for 8192 events => throughput is 2.50E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238468293717765E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.7771s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9289s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.8482s for 90112 events => throughput is 2.34E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.1318s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5454s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.5854s for 90112 events => throughput is 2.51E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.295265e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.562809e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.274937e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.549301e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112720694019242] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5768s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4654s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1114s for 8192 events => throughput is 7.35E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4414s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3412s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0997s for 8192 events => throughput is 8.22E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238454783817719E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.9013s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7217s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1796s for 90112 events => throughput is 7.64E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.6571s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5548s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1018s for 90112 events => throughput is 8.18E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.824875e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.333170e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.970843e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.397937e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112721757974454] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4346s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3870s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0476s for 8192 events => throughput is 1.72E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3825s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3366s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0455s for 8192 events => throughput is 1.80E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238453732924513E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.2132s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6840s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5293s for 90112 events => throughput is 1.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0649s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5567s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5077s for 90112 events => throughput is 1.77E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.559003e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.821951e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.455805e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.834362e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112721757974454] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4492s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4015s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0477s for 8192 events => throughput is 1.72E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3803s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3381s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0418s for 8192 events => throughput is 1.96E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238453732924513E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.1232s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6431s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4802s for 90112 events => throughput is 1.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0303s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5712s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4587s for 90112 events => throughput is 1.96E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.558817e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.018262e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.780926e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.019326e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112723389095883] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4670s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4037s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0633s for 8192 events => throughput is 1.29E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3929s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3375s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0549s for 8192 events => throughput is 1.49E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238464413054557E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.3110s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6631s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6480s for 90112 events => throughput is 1.39E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1189s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5295s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5889s for 90112 events => throughput is 1.53E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.404960e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.561264e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.473914e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.545662e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112725654777677] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.7573s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7564s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0009s for 8192 events => throughput is 8.87E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7590s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7568s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0010s for 8192 events => throughput is 8.12E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238470908598507E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 1.9333s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9227s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0106s for 90112 events => throughput is 8.47E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.9627s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9510s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0105s for 90112 events => throughput is 8.59E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.129185e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.151184e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.549775e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.548948e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.539449e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.576425e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.726001e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.715469e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.546076e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.585156e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.761871e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.753005e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.365545e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.440113e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.281294e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.293588e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index b3a8db7192..b6592dfe65 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -2,21 +2,21 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppavx2 + +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' - - -make USEBUILDDIR=1 BACKEND=cppavx2 -make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:18:54 +DATE: 2024-08-08_20:46:20 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.6896s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3548s - [COUNTERS] Fortran MEs ( 1 ) : 0.3348s for 8192 events => throughput is 2.45E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6929s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3702s + [COUNTERS] Fortran MEs ( 1 ) : 0.3227s for 8192 events => throughput is 2.54E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6450s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3225s - [COUNTERS] Fortran MEs ( 1 ) : 0.3225s for 8192 events => throughput is 2.54E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6641s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3385s + [COUNTERS] Fortran MEs ( 1 ) : 0.3256s for 8192 events => throughput is 2.52E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.1061s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5000s - [COUNTERS] Fortran MEs ( 1 ) : 3.6061s for 90112 events => throughput is 2.50E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.1698s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5570s + [COUNTERS] Fortran MEs ( 1 ) : 3.6128s for 90112 events => throughput is 2.49E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748700702684] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.9893s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6448s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3444s for 8192 events => throughput is 2.38E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6766s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3338s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3416s for 8192 events => throughput is 2.40E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482679400354E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.6402s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8430s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.7972s for 90112 events => throughput is 2.37E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.3154s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5455s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.7687s for 90112 events => throughput is 2.39E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.504502e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.463950e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.493736e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.478616e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748702805033] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6596s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4865s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1730s for 8192 events => throughput is 4.73E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5103s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3345s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1749s for 8192 events => throughput is 4.68E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482683055667E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 3.5611s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6531s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.9081s for 90112 events => throughput is 4.72E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.4746s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5384s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.9354s for 90112 events => throughput is 4.66E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.885906e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.832626e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.454856e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.815562e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748681415580] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5422s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4457s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0965s for 8192 events => throughput is 8.49E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4266s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3394s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0865s for 8192 events => throughput is 9.47E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482534347232E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.8283s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7674s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0609s for 90112 events => throughput is 8.49E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.4911s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5269s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9636s for 90112 events => throughput is 9.35E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.150394e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.435081e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.706600e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.477580e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748681415580] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5467s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4535s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0932s for 8192 events => throughput is 8.79E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4142s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3362s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0774s for 8192 events => throughput is 1.06E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482534347232E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.5615s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6701s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8914s for 90112 events => throughput is 1.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3905s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5342s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8556s for 90112 events => throughput is 1.05E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.036553e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.087061e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.038231e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.088736e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748700265108] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5849s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4623s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1226s for 8192 events => throughput is 6.68E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4463s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3356s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1100s for 8192 events => throughput is 7.45E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482666076374E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.8307s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6135s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2171s for 90112 events => throughput is 7.40E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.7724s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5419s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2297s for 90112 events => throughput is 7.33E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.036325e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.268797e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.017098e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.343356e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748601943165] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.7581s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7527s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.51E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7682s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7592s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0060s for 8192 events => throughput is 1.36E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0029s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481937154381E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 1.9432s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9204s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 90112 events => throughput is 3.95E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.9875s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9612s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0233s for 90112 events => throughput is 3.86E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0029s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.628045e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.654166e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.092999e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.808330e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.285885e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.001990e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.235467e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.235577e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.248772e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.000218e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.246161e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.245999e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.266042e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.996930e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.742147e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.726284e+06 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index a3214916d8..9f965c04b5 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -4,8 +4,8 @@ make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -13,14 +13,14 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:19:38 +DATE: 2024-08-08_20:47:02 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0 [UNWEIGHT] Wrote 11 events (found 187 events) - [COUNTERS] PROGRAM TOTAL : 4.3806s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2416s - [COUNTERS] Fortran MEs ( 1 ) : 4.1390s for 8192 events => throughput is 1.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.5167s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2657s + [COUNTERS] Fortran MEs ( 1 ) : 4.2511s for 8192 events => throughput is 1.93E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 4.4016s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2413s - [COUNTERS] Fortran MEs ( 1 ) : 4.1603s for 8192 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4866s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2643s + [COUNTERS] Fortran MEs ( 1 ) : 4.2223s for 8192 events => throughput is 1.94E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083266099815] fbridge_mode=0 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 47.5196s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7600s - [COUNTERS] Fortran MEs ( 1 ) : 45.7596s for 90112 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 48.4461s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8443s + [COUNTERS] Fortran MEs ( 1 ) : 46.6018s for 90112 events => throughput is 1.93E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222236] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 8.8038s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4979s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.3060s for 8192 events => throughput is 1.90E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.6404s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2618s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.3690s for 8192 events => throughput is 1.88E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0096s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083266099799] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 53.4475s - [COUNTERS] Fortran Overhead ( 0 ) : 5.9535s - [COUNTERS] CudaCpp MEs ( 2 ) : 47.4939s for 90112 events => throughput is 1.90E+03 events/s + [COUNTERS] PROGRAM TOTAL : 49.9380s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7954s + [COUNTERS] CudaCpp MEs ( 2 ) : 48.1336s for 90112 events => throughput is 1.87E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0090s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.922216e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.926413e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.959395e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.935484e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222236] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 4.7615s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4839s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2777s for 8192 events => throughput is 3.60E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6125s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2606s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3472s for 8192 events => throughput is 3.49E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0046s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083266099785] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 30.6075s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0936s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.5139s for 90112 events => throughput is 3.40E+03 events/s + [COUNTERS] PROGRAM TOTAL : 27.5257s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8027s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.7180s for 90112 events => throughput is 3.50E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0050s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.496612e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.649842e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.539346e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.636818e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222231] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 2.2183s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2245s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9937s for 8192 events => throughput is 8.24E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.2653s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2598s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0030s for 8192 events => throughput is 8.17E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0026s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083266099799] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 14.0676s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7785s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.2891s for 90112 events => throughput is 7.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.8598s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7908s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.0665s for 90112 events => throughput is 8.14E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.419384e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.344831e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.678823e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.416676e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222231] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 1.9878s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1062s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8816s for 8192 events => throughput is 9.29E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.1673s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2599s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9051s for 8192 events => throughput is 9.05E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0023s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083266099799] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 12.3962s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6208s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.7754s for 90112 events => throughput is 9.22E+03 events/s + [COUNTERS] PROGRAM TOTAL : 11.7872s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8132s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.9717s for 90112 events => throughput is 9.04E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0022s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.529916e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.472083e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.344439e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.534343e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222231] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 2.4488s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3506s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0982s for 8192 events => throughput is 7.46E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.3936s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2589s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1317s for 8192 events => throughput is 7.24E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0030s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083266099799] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 15.0103s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8885s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.1218s for 90112 events => throughput is 7.43E+03 events/s + [COUNTERS] PROGRAM TOTAL : 14.2691s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8171s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.4493s for 90112 events => throughput is 7.24E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.477284e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.935643e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.503375e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.348983e+03 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222225] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 0.7764s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7438s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0326s for 8192 events => throughput is 2.51E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7693s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6983s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0363s for 8192 events => throughput is 2.26E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0347s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083266099782] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 2.6460s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2818s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3642s for 90112 events => throughput is 2.47E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.6062s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2048s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3669s for 90112 events => throughput is 2.46E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0344s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.283334e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.290486e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.506901e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.506388e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.117711e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.134196e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.183664e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.177921e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.114395e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.129278e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.181079e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.155764e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.111113e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.126990e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.444874e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.446377e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 565fe287ce..cd633f37c7 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -4,8 +4,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone - make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:23:54 +DATE: 2024-08-08_20:51:00 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0 [UNWEIGHT] Wrote 11 events (found 187 events) - [COUNTERS] PROGRAM TOTAL : 4.3980s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2420s - [COUNTERS] Fortran MEs ( 1 ) : 4.1560s for 8192 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4959s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2635s + [COUNTERS] Fortran MEs ( 1 ) : 4.2323s for 8192 events => throughput is 1.94E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 4.4132s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2372s - [COUNTERS] Fortran MEs ( 1 ) : 4.1760s for 8192 events => throughput is 1.96E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4788s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2631s + [COUNTERS] Fortran MEs ( 1 ) : 4.2156s for 8192 events => throughput is 1.94E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083266099815] fbridge_mode=0 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 47.3387s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7498s - [COUNTERS] Fortran MEs ( 1 ) : 45.5889s for 90112 events => throughput is 1.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 48.4352s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8357s + [COUNTERS] Fortran MEs ( 1 ) : 46.5995s for 90112 events => throughput is 1.93E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320716615478996] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 8.4803s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3064s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.1739s for 8192 events => throughput is 1.96E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.5354s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2660s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.2605s for 8192 events => throughput is 1.92E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0089s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558162567940870] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 52.0102s - [COUNTERS] Fortran Overhead ( 0 ) : 5.8677s - [COUNTERS] CudaCpp MEs ( 2 ) : 46.1425s for 90112 events => throughput is 1.95E+03 events/s + [COUNTERS] PROGRAM TOTAL : 48.5468s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7982s + [COUNTERS] CudaCpp MEs ( 2 ) : 46.7401s for 90112 events => throughput is 1.93E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0085s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.015479e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.996945e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.013117e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.982014e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320708851010073] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 2.5680s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4066s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1614s for 8192 events => throughput is 7.05E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.4573s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2634s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1914s for 8192 events => throughput is 6.88E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558157380141428] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 15.5194s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8862s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.6332s for 90112 events => throughput is 7.13E+03 events/s + [COUNTERS] PROGRAM TOTAL : 14.6570s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7854s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.8693s for 90112 events => throughput is 7.00E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0024s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.320236e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.255598e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.204448e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.246435e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320704806184321] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 1.2285s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7290s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4995s for 8192 events => throughput is 1.64E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7739s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2587s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5137s for 8192 events => throughput is 1.59E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558158459897135] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 7.7643s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2713s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.4931s for 90112 events => throughput is 1.64E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.4672s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7991s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.6666s for 90112 events => throughput is 1.59E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.674256e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.606140e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.684230e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.576957e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320704806184321] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 1.1706s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6998s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4708s for 8192 events => throughput is 1.74E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7680s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2709s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4957s for 8192 events => throughput is 1.65E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0013s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558158459897135] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 7.1094s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2054s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.9040s for 90112 events => throughput is 1.84E+04 events/s + [COUNTERS] PROGRAM TOTAL : 6.7809s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7804s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.9992s for 90112 events => throughput is 1.80E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.876397e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.849666e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.833562e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.858554e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320713685871445] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 1.4404s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8351s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6053s for 8192 events => throughput is 1.35E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8187s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2599s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5571s for 8192 events => throughput is 1.47E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0017s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558162184774774] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 8.3122s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3041s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.0081s for 90112 events => throughput is 1.50E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.9104s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7899s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.1190s for 90112 events => throughput is 1.47E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.515129e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.496224e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.524953e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.504281e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320719394836651] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 0.7481s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7253s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 8192 events => throughput is 3.60E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7396s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6908s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0246s for 8192 events => throughput is 3.32E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0242s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558167135091578] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 2.4757s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2273s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2484s for 90112 events => throughput is 3.63E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4680s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1917s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2521s for 90112 events => throughput is 3.57E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0241s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.375249e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.382988e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.741898e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.717142e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.119355e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.139748e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.307435e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.304954e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.151709e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.085623e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.303009e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.300454e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.049512e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.130448e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.396866e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.397157e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 93675b1fbf..27512be658 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,9 +1,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:27:14 +DATE: 2024-08-08_20:54:10 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0 [UNWEIGHT] Wrote 11 events (found 187 events) - [COUNTERS] PROGRAM TOTAL : 4.3736s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2409s - [COUNTERS] Fortran MEs ( 1 ) : 4.1327s for 8192 events => throughput is 1.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4700s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2619s + [COUNTERS] Fortran MEs ( 1 ) : 4.2081s for 8192 events => throughput is 1.95E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 4.3735s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2382s - [COUNTERS] Fortran MEs ( 1 ) : 4.1353s for 8192 events => throughput is 1.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4683s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2604s + [COUNTERS] Fortran MEs ( 1 ) : 4.2079s for 8192 events => throughput is 1.95E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083266099815] fbridge_mode=0 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 47.4052s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7410s - [COUNTERS] Fortran MEs ( 1 ) : 45.6641s for 90112 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 48.3196s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8366s + [COUNTERS] Fortran MEs ( 1 ) : 46.4830s for 90112 events => throughput is 1.94E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556893412546] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 8.8075s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4671s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.3405s for 8192 events => throughput is 1.89E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.6760s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2586s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.4088s for 8192 events => throughput is 1.86E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0086s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083370546855] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 53.8850s - [COUNTERS] Fortran Overhead ( 0 ) : 5.9787s - [COUNTERS] CudaCpp MEs ( 2 ) : 47.9063s for 90112 events => throughput is 1.88E+03 events/s + [COUNTERS] PROGRAM TOTAL : 50.5724s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8031s + [COUNTERS] CudaCpp MEs ( 2 ) : 48.7604s for 90112 events => throughput is 1.85E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0089s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.948359e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.909521e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.951349e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.899981e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556780656974] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 4.7620s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4646s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2974s for 8192 events => throughput is 3.57E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5687s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2576s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3063s for 8192 events => throughput is 3.55E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0048s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083390630859] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 29.1849s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0005s - [COUNTERS] CudaCpp MEs ( 2 ) : 25.1844s for 90112 events => throughput is 3.58E+03 events/s + [COUNTERS] PROGRAM TOTAL : 27.4318s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7915s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.6356s for 90112 events => throughput is 3.52E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0047s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.700037e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.646364e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.594428e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.634455e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556770726795] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 2.2008s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2107s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9900s for 8192 events => throughput is 8.27E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.2686s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2604s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0056s for 8192 events => throughput is 8.15E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083379720220] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 13.6123s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7227s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.8896s for 90112 events => throughput is 8.28E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.9032s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7920s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.1088s for 90112 events => throughput is 8.11E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0024s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.547846e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.153831e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.522651e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.410165e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556770726795] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 1.9517s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0869s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8648s for 8192 events => throughput is 9.47E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.1480s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2607s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8850s for 8192 events => throughput is 9.26E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0023s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083379720220] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 12.1428s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5992s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.5436s for 90112 events => throughput is 9.44E+03 events/s + [COUNTERS] PROGRAM TOTAL : 11.5478s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7830s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.7625s for 90112 events => throughput is 9.23E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0024s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.818655e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.509937e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.752589e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.503575e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556770726795] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 2.4530s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3428s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1102s for 8192 events => throughput is 7.38E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.3881s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2592s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1259s for 8192 events => throughput is 7.28E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0031s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083379720220] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 16.4311s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0180s - [COUNTERS] CudaCpp MEs ( 2 ) : 13.4132s for 90112 events => throughput is 6.72E+03 events/s + [COUNTERS] PROGRAM TOTAL : 14.4378s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7995s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.6355s for 90112 events => throughput is 7.13E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.834157e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.378664e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.943776e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.252552e+03 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556665261842] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 0.7919s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7589s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0330s for 8192 events => throughput is 2.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7612s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6909s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0360s for 8192 events => throughput is 2.27E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0343s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083224243403] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 2.7451s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3819s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3632s for 90112 events => throughput is 2.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.5943s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1940s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3660s for 90112 events => throughput is 2.46E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0343s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.281497e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.292672e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.508846e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.513091e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.120326e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.132768e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.183615e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.151465e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.108754e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.134281e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.168874e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.177596e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.121179e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.130147e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.453826e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.451952e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index a3ac7b5c1f..dab5f736a0 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -3,17 +3,17 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda + make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 + +make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make USEBUILDDIR=1 BACKEND=cpp512y - make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:32:46 +DATE: 2024-08-08_20:59:36 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 101.2642s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5189s - [COUNTERS] Fortran MEs ( 1 ) : 100.7453s for 8192 events => throughput is 8.13E+01 events/s + [COUNTERS] PROGRAM TOTAL : 102.0811s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5116s + [COUNTERS] Fortran MEs ( 1 ) : 101.5694s for 8192 events => throughput is 8.07E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 98.9549s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5002s - [COUNTERS] Fortran MEs ( 1 ) : 98.4548s for 8192 events => throughput is 8.32E+01 events/s + [COUNTERS] PROGRAM TOTAL : 102.0739s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5163s + [COUNTERS] Fortran MEs ( 1 ) : 101.5576s for 8192 events => throughput is 8.07E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1078.9193s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3729s - [COUNTERS] Fortran MEs ( 1 ) : 1074.5464s for 90112 events => throughput is 8.39E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1120.7697s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3745s + [COUNTERS] Fortran MEs ( 1 ) : 1116.3951s for 90112 events => throughput is 8.07E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939193E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 226.9636s - [COUNTERS] Fortran Overhead ( 0 ) : 104.1252s - [COUNTERS] CudaCpp MEs ( 2 ) : 122.8384s for 8192 events => throughput is 6.67E+01 events/s + [COUNTERS] PROGRAM TOTAL : 122.6268s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5175s + [COUNTERS] CudaCpp MEs ( 2 ) : 121.9186s for 8192 events => throughput is 6.72E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1907s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1453.2216s - [COUNTERS] Fortran Overhead ( 0 ) : 108.8256s - [COUNTERS] CudaCpp MEs ( 2 ) : 1344.3960s for 90112 events => throughput is 6.70E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1388.7153s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3988s + [COUNTERS] CudaCpp MEs ( 2 ) : 1384.1234s for 90112 events => throughput is 6.51E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1931s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.957500e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.880201e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.966466e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.389775e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939197E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 110.0381s - [COUNTERS] Fortran Overhead ( 0 ) : 50.3711s - [COUNTERS] CudaCpp MEs ( 2 ) : 59.6670s for 8192 events => throughput is 1.37E+02 events/s + [COUNTERS] PROGRAM TOTAL : 60.8180s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5182s + [COUNTERS] CudaCpp MEs ( 2 ) : 60.1993s for 8192 events => throughput is 1.36E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1005s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656017E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 713.5237s - [COUNTERS] Fortran Overhead ( 0 ) : 54.3392s - [COUNTERS] CudaCpp MEs ( 2 ) : 659.1845s for 90112 events => throughput is 1.37E+02 events/s + [COUNTERS] PROGRAM TOTAL : 663.6261s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4076s + [COUNTERS] CudaCpp MEs ( 2 ) : 659.1171s for 90112 events => throughput is 1.37E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1014s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.640648e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.603881e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.642221e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.607115e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 51.6707s - [COUNTERS] Fortran Overhead ( 0 ) : 23.8142s - [COUNTERS] CudaCpp MEs ( 2 ) : 27.8565s for 8192 events => throughput is 2.94E+02 events/s + [COUNTERS] PROGRAM TOTAL : 28.7968s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5160s + [COUNTERS] CudaCpp MEs ( 2 ) : 28.2344s for 8192 events => throughput is 2.90E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0464s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 333.1028s - [COUNTERS] Fortran Overhead ( 0 ) : 27.4583s - [COUNTERS] CudaCpp MEs ( 2 ) : 305.6444s for 90112 events => throughput is 2.95E+02 events/s + [COUNTERS] PROGRAM TOTAL : 314.6312s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4324s + [COUNTERS] CudaCpp MEs ( 2 ) : 310.1525s for 90112 events => throughput is 2.91E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0464s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.542437e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.378917e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.558618e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.496128e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 45.2154s - [COUNTERS] Fortran Overhead ( 0 ) : 20.7378s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.4776s for 8192 events => throughput is 3.35E+02 events/s + [COUNTERS] PROGRAM TOTAL : 25.3254s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5203s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.7644s for 8192 events => throughput is 3.31E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0408s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 293.9503s - [COUNTERS] Fortran Overhead ( 0 ) : 24.5760s - [COUNTERS] CudaCpp MEs ( 2 ) : 269.3742s for 90112 events => throughput is 3.35E+02 events/s + [COUNTERS] PROGRAM TOTAL : 277.9808s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4083s + [COUNTERS] CudaCpp MEs ( 2 ) : 273.5305s for 90112 events => throughput is 3.29E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0420s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.038194e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.986386e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.054728e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.006448e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 46.6506s - [COUNTERS] Fortran Overhead ( 0 ) : 22.6220s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.0285s for 8192 events => throughput is 3.41E+02 events/s + [COUNTERS] PROGRAM TOTAL : 25.0869s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5172s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.5238s for 8192 events => throughput is 3.34E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0459s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 291.3320s - [COUNTERS] Fortran Overhead ( 0 ) : 26.5806s - [COUNTERS] CudaCpp MEs ( 2 ) : 264.7513s for 90112 events => throughput is 3.40E+02 events/s + [COUNTERS] PROGRAM TOTAL : 271.0840s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3948s + [COUNTERS] CudaCpp MEs ( 2 ) : 266.6404s for 90112 events => throughput is 3.38E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0489s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.666698e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.641160e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.681846e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.622116e+02 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939195E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 4.2374s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1562s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0812s for 8192 events => throughput is 7.58E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.2426s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0583s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0970s for 8192 events => throughput is 7.47E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 1.0873s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656006E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 18.7608s - [COUNTERS] Fortran Overhead ( 0 ) : 6.8530s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.9078s for 90112 events => throughput is 7.57E+03 events/s + [COUNTERS] PROGRAM TOTAL : 17.9203s + [COUNTERS] Fortran Overhead ( 0 ) : 4.9107s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.9249s for 90112 events => throughput is 7.56E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 1.0847s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.502460e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.521131e+03 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.309158e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.292650e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.222869e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.241733e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.577320e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.585186e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.238055e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.235154e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.437122e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.473644e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.224755e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.236111e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.231636e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.235762e+03 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index a539e33f24..4ffdbee10a 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -4,8 +4,8 @@ make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_23:01:56 +DATE: 2024-08-08_22:23:03 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 97.1861s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4946s - [COUNTERS] Fortran MEs ( 1 ) : 96.6914s for 8192 events => throughput is 8.47E+01 events/s + [COUNTERS] PROGRAM TOTAL : 101.3873s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5075s + [COUNTERS] Fortran MEs ( 1 ) : 100.8798s for 8192 events => throughput is 8.12E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 96.7329s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4973s - [COUNTERS] Fortran MEs ( 1 ) : 96.2355s for 8192 events => throughput is 8.51E+01 events/s + [COUNTERS] PROGRAM TOTAL : 102.2416s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5135s + [COUNTERS] Fortran MEs ( 1 ) : 101.7281s for 8192 events => throughput is 8.05E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1066.1378s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3607s - [COUNTERS] Fortran MEs ( 1 ) : 1061.7771s for 90112 events => throughput is 8.49E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1114.7300s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3428s + [COUNTERS] Fortran MEs ( 1 ) : 1110.3872s for 90112 events => throughput is 8.12E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405719945779552E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 204.5576s - [COUNTERS] Fortran Overhead ( 0 ) : 94.7222s - [COUNTERS] CudaCpp MEs ( 2 ) : 109.8354s for 8192 events => throughput is 7.46E+01 events/s + [COUNTERS] PROGRAM TOTAL : 111.0089s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5100s + [COUNTERS] CudaCpp MEs ( 2 ) : 110.3187s for 8192 events => throughput is 7.43E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1802s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -168,9 +169,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326290777570335E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1302.1844s - [COUNTERS] Fortran Overhead ( 0 ) : 96.3064s - [COUNTERS] CudaCpp MEs ( 2 ) : 1205.8781s for 90112 events => throughput is 7.47E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1216.8479s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4035s + [COUNTERS] CudaCpp MEs ( 2 ) : 1212.2644s for 90112 events => throughput is 7.43E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1800s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.892403e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.795452e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.896902e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.783118e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -212,9 +214,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405716994349971E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 50.4740s - [COUNTERS] Fortran Overhead ( 0 ) : 23.8797s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.5944s for 8192 events => throughput is 3.08E+02 events/s + [COUNTERS] PROGRAM TOTAL : 27.4750s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5164s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.9120s for 8192 events => throughput is 3.04E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0465s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -246,9 +249,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326284885505778E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 322.1801s - [COUNTERS] Fortran Overhead ( 0 ) : 27.6823s - [COUNTERS] CudaCpp MEs ( 2 ) : 294.4978s for 90112 events => throughput is 3.06E+02 events/s + [COUNTERS] PROGRAM TOTAL : 300.8248s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4082s + [COUNTERS] CudaCpp MEs ( 2 ) : 296.3700s for 90112 events => throughput is 3.04E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0466s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.518666e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.485944e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.484203e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.470723e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -290,9 +294,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405716646933743E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 26.5623s - [COUNTERS] Fortran Overhead ( 0 ) : 12.2724s - [COUNTERS] CudaCpp MEs ( 2 ) : 14.2899s for 8192 events => throughput is 5.73E+02 events/s + [COUNTERS] PROGRAM TOTAL : 14.5936s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5183s + [COUNTERS] CudaCpp MEs ( 2 ) : 14.0522s for 8192 events => throughput is 5.83E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0231s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -324,9 +329,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326277033163402E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 174.5012s - [COUNTERS] Fortran Overhead ( 0 ) : 16.5646s - [COUNTERS] CudaCpp MEs ( 2 ) : 157.9366s for 90112 events => throughput is 5.71E+02 events/s + [COUNTERS] PROGRAM TOTAL : 158.5014s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4348s + [COUNTERS] CudaCpp MEs ( 2 ) : 154.0430s for 90112 events => throughput is 5.85E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0236s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.701727e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.991558e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.718879e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.952358e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -368,9 +374,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405716646933743E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 23.9391s - [COUNTERS] Fortran Overhead ( 0 ) : 11.1292s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.8099s for 8192 events => throughput is 6.40E+02 events/s + [COUNTERS] PROGRAM TOTAL : 12.8606s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5199s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.3203s for 8192 events => throughput is 6.65E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0204s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -402,9 +409,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326277033163402E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 155.8533s - [COUNTERS] Fortran Overhead ( 0 ) : 14.9625s - [COUNTERS] CudaCpp MEs ( 2 ) : 140.8908s for 90112 events => throughput is 6.40E+02 events/s + [COUNTERS] PROGRAM TOTAL : 139.5398s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3981s + [COUNTERS] CudaCpp MEs ( 2 ) : 135.1212s for 90112 events => throughput is 6.67E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0205s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.716591e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.890802e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.738216e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.069181e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -446,9 +454,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405719257109645E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 25.2840s - [COUNTERS] Fortran Overhead ( 0 ) : 12.4664s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.8176s for 8192 events => throughput is 6.39E+02 events/s + [COUNTERS] PROGRAM TOTAL : 12.8130s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5166s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.2739s for 8192 events => throughput is 6.67E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0225s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -480,9 +489,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326283665697276E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 149.3053s - [COUNTERS] Fortran Overhead ( 0 ) : 16.1390s - [COUNTERS] CudaCpp MEs ( 2 ) : 133.1664s for 90112 events => throughput is 6.77E+02 events/s + [COUNTERS] PROGRAM TOTAL : 139.5916s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4260s + [COUNTERS] CudaCpp MEs ( 2 ) : 135.1428s for 90112 events => throughput is 6.67E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0228s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.340832e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.223008e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.256949e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.135239e+02 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -523,9 +533,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405721007137020E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 2.5997s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0687s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5310s for 8192 events => throughput is 1.54E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.1089s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0215s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5332s for 8192 events => throughput is 1.54E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.5542s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -556,9 +567,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326295421688232E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 11.4660s - [COUNTERS] Fortran Overhead ( 0 ) : 5.7555s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.7106s for 90112 events => throughput is 1.58E+04 events/s + [COUNTERS] PROGRAM TOTAL : 11.2844s + [COUNTERS] Fortran Overhead ( 0 ) : 4.8851s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.8421s for 90112 events => throughput is 1.54E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.5572s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -571,42 +583,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.530700e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.533878e+04 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.545413e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.547825e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.163666e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.147653e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.148478e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.124611e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.113035e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.134315e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.205309e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.131039e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.129848e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.139642e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.016707e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.021489e+03 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 78332de82a..e8248fddca 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,10 +1,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -make USEBUILDDIR=1 BACKEND=cuda - +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -24,15 +24,15 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-06-29_00:09:11 +DATE: 2024-08-08_23:26:17 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 96.7022s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4938s - [COUNTERS] Fortran MEs ( 1 ) : 96.2084s for 8192 events => throughput is 8.51E+01 events/s + [COUNTERS] PROGRAM TOTAL : 103.0122s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5139s + [COUNTERS] Fortran MEs ( 1 ) : 102.4983s for 8192 events => throughput is 7.99E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 96.6669s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5001s - [COUNTERS] Fortran MEs ( 1 ) : 96.1668s for 8192 events => throughput is 8.52E+01 events/s + [COUNTERS] PROGRAM TOTAL : 101.2993s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5294s + [COUNTERS] Fortran MEs ( 1 ) : 100.7699s for 8192 events => throughput is 8.13E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1066.3936s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3326s - [COUNTERS] Fortran MEs ( 1 ) : 1062.0609s for 90112 events => throughput is 8.48E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1118.7642s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3619s + [COUNTERS] Fortran MEs ( 1 ) : 1114.4022s for 90112 events => throughput is 8.09E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985299359844E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 227.5404s - [COUNTERS] Fortran Overhead ( 0 ) : 104.7327s - [COUNTERS] CudaCpp MEs ( 2 ) : 122.8076s for 8192 events => throughput is 6.67E+01 events/s + [COUNTERS] PROGRAM TOTAL : 125.7885s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5193s + [COUNTERS] CudaCpp MEs ( 2 ) : 125.0621s for 8192 events => throughput is 6.55E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.2071s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993212353001E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1460.6766s - [COUNTERS] Fortran Overhead ( 0 ) : 108.1514s - [COUNTERS] CudaCpp MEs ( 2 ) : 1352.5253s for 90112 events => throughput is 6.66E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1322.8827s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3903s + [COUNTERS] CudaCpp MEs ( 2 ) : 1318.2870s for 90112 events => throughput is 6.84E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.2054s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.953346e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.761597e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.887442e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.724704e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985295828471E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 113.0177s - [COUNTERS] Fortran Overhead ( 0 ) : 51.7429s - [COUNTERS] CudaCpp MEs ( 2 ) : 61.2748s for 8192 events => throughput is 1.34E+02 events/s + [COUNTERS] PROGRAM TOTAL : 62.4510s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5155s + [COUNTERS] CudaCpp MEs ( 2 ) : 61.8333s for 8192 events => throughput is 1.32E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1022s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993222645653E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 722.6357s - [COUNTERS] Fortran Overhead ( 0 ) : 55.1190s - [COUNTERS] CudaCpp MEs ( 2 ) : 667.5167s for 90112 events => throughput is 1.35E+02 events/s + [COUNTERS] PROGRAM TOTAL : 684.8121s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4198s + [COUNTERS] CudaCpp MEs ( 2 ) : 680.2921s for 90112 events => throughput is 1.32E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.603521e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.589042e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.594140e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.588931e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 49.4491s - [COUNTERS] Fortran Overhead ( 0 ) : 22.5625s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.8866s for 8192 events => throughput is 3.05E+02 events/s + [COUNTERS] PROGRAM TOTAL : 27.0092s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5181s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.4459s for 8192 events => throughput is 3.10E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0452s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 323.2367s - [COUNTERS] Fortran Overhead ( 0 ) : 26.2106s - [COUNTERS] CudaCpp MEs ( 2 ) : 297.0260s for 90112 events => throughput is 3.03E+02 events/s + [COUNTERS] PROGRAM TOTAL : 298.0409s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4173s + [COUNTERS] CudaCpp MEs ( 2 ) : 293.5790s for 90112 events => throughput is 3.07E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0445s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.746482e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.648206e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.754234e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.625373e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 43.5060s - [COUNTERS] Fortran Overhead ( 0 ) : 19.6610s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.8450s for 8192 events => throughput is 3.44E+02 events/s + [COUNTERS] PROGRAM TOTAL : 24.3540s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5168s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.7936s for 8192 events => throughput is 3.44E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0436s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 285.3812s - [COUNTERS] Fortran Overhead ( 0 ) : 23.3907s - [COUNTERS] CudaCpp MEs ( 2 ) : 261.9904s for 90112 events => throughput is 3.44E+02 events/s + [COUNTERS] PROGRAM TOTAL : 269.6777s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4164s + [COUNTERS] CudaCpp MEs ( 2 ) : 265.2234s for 90112 events => throughput is 3.40E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0378s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.296352e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.285493e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.310764e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.289545e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 45.6608s - [COUNTERS] Fortran Overhead ( 0 ) : 22.1706s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.4902s for 8192 events => throughput is 3.49E+02 events/s + [COUNTERS] PROGRAM TOTAL : 25.1227s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5145s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.5642s for 8192 events => throughput is 3.33E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0441s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 283.0596s - [COUNTERS] Fortran Overhead ( 0 ) : 25.7915s - [COUNTERS] CudaCpp MEs ( 2 ) : 257.2681s for 90112 events => throughput is 3.50E+02 events/s + [COUNTERS] PROGRAM TOTAL : 274.1583s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4200s + [COUNTERS] CudaCpp MEs ( 2 ) : 269.6946s for 90112 events => throughput is 3.34E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0436s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.790026e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.625912e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.795378e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.662510e+02 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985217419736E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 3.5887s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7249s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8638s for 8192 events => throughput is 9.48E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7717s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0261s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8763s for 8192 events => throughput is 9.35E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.8694s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993078576733E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 15.9655s - [COUNTERS] Fortran Overhead ( 0 ) : 6.4579s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.5077s for 90112 events => throughput is 9.48E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.2659s + [COUNTERS] Fortran Overhead ( 0 ) : 4.8943s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.5013s for 90112 events => throughput is 9.48E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.8704s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.454304e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.434661e+03 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.092578e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.089765e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.107884e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.112116e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.159186e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.160890e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.108717e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.108390e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.111609e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.111312e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.112293e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.109990e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.645128e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.638783e+03 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 5750f0dd36..b877c26fea 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,22 +1,22 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu - make USEBUILDDIR=1 BACKEND=cuda + + make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512y - make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' + +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:31:29 +DATE: 2024-08-08_20:58:09 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.4791s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4063s - [COUNTERS] Fortran MEs ( 1 ) : 0.0728s for 8192 events => throughput is 1.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4754s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4051s + [COUNTERS] Fortran MEs ( 1 ) : 0.0703s for 8192 events => throughput is 1.16E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4114s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3366s - [COUNTERS] Fortran MEs ( 1 ) : 0.0748s for 8192 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4153s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3445s + [COUNTERS] Fortran MEs ( 1 ) : 0.0708s for 8192 events => throughput is 1.16E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.3443s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5396s - [COUNTERS] Fortran MEs ( 1 ) : 0.8047s for 90112 events => throughput is 1.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3303s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5573s + [COUNTERS] Fortran MEs ( 1 ) : 0.7730s for 90112 events => throughput is 1.17E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263335] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4894s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4114s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0780s for 8192 events => throughput is 1.05E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4189s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3418s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0764s for 8192 events => throughput is 1.07E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.4884s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6200s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8684s for 90112 events => throughput is 1.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3766s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5374s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8384s for 90112 events => throughput is 1.07E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.076463e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.104999e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.077689e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.080050e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351262541] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4183s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3754s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0429s for 8192 events => throughput is 1.91E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3875s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3450s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0419s for 8192 events => throughput is 1.96E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561281] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0553s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5812s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4741s for 90112 events => throughput is 1.90E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0024s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5394s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4624s for 90112 events => throughput is 1.95E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.913652e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.937885e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.900215e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.972484e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3820s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3572s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0248s for 8192 events => throughput is 3.31E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3673s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3427s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0240s for 8192 events => throughput is 3.41E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8264s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5516s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2748s for 90112 events => throughput is 3.28E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8108s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5445s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2657s for 90112 events => throughput is 3.39E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.312529e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.384861e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.346229e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.378583e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3800s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3577s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0223s for 8192 events => throughput is 3.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3684s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3456s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0222s for 8192 events => throughput is 3.69E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.7983s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5529s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2454s for 90112 events => throughput is 3.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7798s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5417s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2375s for 90112 events => throughput is 3.79E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.396153e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.465878e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.717666e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.626688e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4112s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3763s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0348s for 8192 events => throughput is 2.35E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3809s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3477s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0325s for 8192 events => throughput is 2.52E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0061s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6154s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3906s for 90112 events => throughput is 2.31E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8986s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5431s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3549s for 90112 events => throughput is 2.54E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.408273e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.412835e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.448995e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.491870e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263363] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.7845s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7838s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.14E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7705s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7685s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 1.03E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561304] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.9715s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9630s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 90112 events => throughput is 1.06E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.9737s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9648s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0078s for 90112 events => throughput is 1.15E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.482506e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.555983e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.967685e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.037158e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.213762e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.629928e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.531197e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.566255e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.254372e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.636845e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.843988e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.850724e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.244977e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.619360e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.788434e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.790736e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 19656e6368..8ac388b886 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,30 +1,30 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu - make USEBUILDDIR=1 BACKEND=cuda + make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make USEBUILDDIR=1 BACKEND=cppavx2 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' - -make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:31:59 +DATE: 2024-08-08_20:58:38 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.4662s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3929s - [COUNTERS] Fortran MEs ( 1 ) : 0.0732s for 8192 events => throughput is 1.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4756s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4044s + [COUNTERS] Fortran MEs ( 1 ) : 0.0711s for 8192 events => throughput is 1.15E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4147s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3397s - [COUNTERS] Fortran MEs ( 1 ) : 0.0750s for 8192 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4108s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3420s + [COUNTERS] Fortran MEs ( 1 ) : 0.0688s for 8192 events => throughput is 1.19E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.3268s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5225s - [COUNTERS] Fortran MEs ( 1 ) : 0.8043s for 90112 events => throughput is 1.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3245s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5525s + [COUNTERS] Fortran MEs ( 1 ) : 0.7719s for 90112 events => throughput is 1.17E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110463158198617] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4778s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4049s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0729s for 8192 events => throughput is 1.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4137s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3419s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0712s for 8192 events => throughput is 1.15E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686347932190] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.4140s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6089s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8051s for 90112 events => throughput is 1.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3233s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5375s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7851s for 90112 events => throughput is 1.15E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.138677e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.154270e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.134442e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.117776e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110459183868807] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3860s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3594s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0266s for 8192 events => throughput is 3.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3703s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3439s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0260s for 8192 events => throughput is 3.15E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510683073685827] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8423s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5500s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2923s for 90112 events => throughput is 3.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8197s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5348s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2844s for 90112 events => throughput is 3.17E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.036160e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.998738e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.064103e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.994620e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110460727141733] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3719s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3585s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0133s for 8192 events => throughput is 6.14E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3581s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3447s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0130s for 8192 events => throughput is 6.29E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510682516942223] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.6766s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5315s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1451s for 90112 events => throughput is 6.21E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6873s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5442s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1427s for 90112 events => throughput is 6.31E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.245191e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.110364e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.259060e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.231132e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110460727141733] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4340s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4218s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0122s for 8192 events => throughput is 6.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3551s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3423s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0124s for 8192 events => throughput is 6.61E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510682516942223] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.6834s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5468s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1366s for 90112 events => throughput is 6.60E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6706s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5390s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1312s for 90112 events => throughput is 6.87E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.674563e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.737889e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.747510e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.863785e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -429,38 +437,179 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2711 [0.27110464220032526] fbridge_mode=1 + [UNWEIGHT] Wrote 404 events (found 1228 events) + [COUNTERS] PROGRAM TOTAL : 0.3592s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3420s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0167s for 8192 events => throughput is 4.91E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110464220032526) differ by less than 4E-4 (2.771292368253242e-06) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2151 [0.21510685471570221] fbridge_mode=1 + [UNWEIGHT] Wrote 1939 events (found 1944 events) + [COUNTERS] PROGRAM TOTAL : 1.7199s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5400s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1795s for 90112 events => throughput is 5.02E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510685471570221) differ by less than 4E-4 (5.043963013928732e-08) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.872478e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.938459e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2711 [0.27110477321990667] fbridge_mode=1 + [UNWEIGHT] Wrote 404 events (found 1228 events) + [COUNTERS] PROGRAM TOTAL : 0.7679s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7663s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.31E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.27110539351263330) and cuda (0.27110477321990667) differ by less than 4E-4 (2.2880132283242816e-06) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2151 [0.21510689318513457] fbridge_mode=1 + [UNWEIGHT] Wrote 1939 events (found 1944 events) + [COUNTERS] PROGRAM TOTAL : 1.9690s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9617s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 90112 events => throughput is 1.43E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21510686556561295) and cuda (0.21510689318513457) differ by less than 4E-4 (1.2839907048700638e-07) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.567743e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.424411e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.006580e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.460162e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.113271e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.506902e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.545880e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.393633e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** -Program received signal SIGFPE: Floating-point exception - erroneous arithmetic operation. - -Backtrace for this error: -#0 0x7f2ce7e23860 in ??? -#1 0x7f2ce7e22a05 in ??? -#2 0x7f2ce7a54def in ??? -#3 0x7f2ce84b810a in ??? -#4 0x7f2ce80f2575 in ??? -#5 0x7f2ce84b4c89 in ??? -#6 0x7f2ce84bebfd in ??? -#7 0x7f2ce84c4491 in ??? -#8 0x4300eb in ??? -#9 0x431c70 in ??? -#10 0x432da7 in ??? -#11 0x433b7e in ??? -#12 0x44a9c1 in ??? -#13 0x42ebdf in ??? -#14 0x40371e in ??? -#15 0x7f2ce7a3feaf in ??? -#16 0x7f2ce7a3ff5f in ??? -#17 0x403844 in ??? -#18 0xffffffffffffffff in ??? -./madX.sh: line 389: 827445 Floating point exception(core dumped) $timecmd $cmd < ${tmpin} > ${tmp} -ERROR! ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' failed - PDF set = nn23lo1 - alpha_s(Mz)= 0.1300 running at 2 loops. - alpha_s(Mz)= 0.1300 running at 2 loops. - Renormalization scale set on event-by-event basis - Factorization scale set on event-by-event basis - - - getting user params -Enter number of events and max and min iterations: - Number of events and iterations 8192 1 1 +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index ce6f992dd2..25661e1063 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -2,21 +2,21 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda - make USEBUILDDIR=1 BACKEND=cppnone + +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' + +make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make USEBUILDDIR=1 BACKEND=cpp512y - make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:32:16 +DATE: 2024-08-08_20:59:06 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.4634s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3913s - [COUNTERS] Fortran MEs ( 1 ) : 0.0721s for 8192 events => throughput is 1.14E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4768s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4060s + [COUNTERS] Fortran MEs ( 1 ) : 0.0709s for 8192 events => throughput is 1.16E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4051s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3318s - [COUNTERS] Fortran MEs ( 1 ) : 0.0733s for 8192 events => throughput is 1.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4179s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3473s + [COUNTERS] Fortran MEs ( 1 ) : 0.0706s for 8192 events => throughput is 1.16E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.3247s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5253s - [COUNTERS] Fortran MEs ( 1 ) : 0.7994s for 90112 events => throughput is 1.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3258s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5517s + [COUNTERS] Fortran MEs ( 1 ) : 0.7741s for 90112 events => throughput is 1.16E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539350666329] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4866s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4090s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0776s for 8192 events => throughput is 1.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4207s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3437s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0763s for 8192 events => throughput is 1.07E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686560103207] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.4492s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6001s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8491s for 90112 events => throughput is 1.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3663s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5373s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8282s for 90112 events => throughput is 1.09E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.073902e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.091070e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.081313e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.097593e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539350666335] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4169s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3745s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0424s for 8192 events => throughput is 1.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3890s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3472s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0412s for 8192 events => throughput is 1.99E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686560103204] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0520s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5829s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4690s for 90112 events => throughput is 1.92E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9944s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5398s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4540s for 90112 events => throughput is 1.98E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.888912e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.922053e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.907750e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.990970e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539330887440] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3963s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3710s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0253s for 8192 events => throughput is 3.24E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3734s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3492s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0237s for 8192 events => throughput is 3.46E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686557693198] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8273s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5566s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2707s for 90112 events => throughput is 3.33E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8003s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5375s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2622s for 90112 events => throughput is 3.44E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.409995e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.424784e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.381232e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.455227e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539330887440] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3784s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3565s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0219s for 8192 events => throughput is 3.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3680s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3463s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0211s for 8192 events => throughput is 3.88E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686557693198] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.7902s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5474s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2428s for 90112 events => throughput is 3.71E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7822s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5448s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2369s for 90112 events => throughput is 3.80E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.780633e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.843024e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.863761e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.890496e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -429,7 +437,6 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 @@ -438,9 +445,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539330887440] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4046s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3701s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0345s for 8192 events => throughput is 2.38E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3872s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3503s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0362s for 8192 events => throughput is 2.26E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +479,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686557693198] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.9365s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5577s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3788s for 90112 events => throughput is 2.38E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9147s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5452s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3689s for 90112 events => throughput is 2.44E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.345816e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.300565e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.395812e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.415614e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +523,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539343558537] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.7702s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7695s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.17E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7684s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7665s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 1.09E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +557,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686553631395] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0084s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0002s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 90112 events => throughput is 1.10E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.9688s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9599s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0078s for 90112 events => throughput is 1.15E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.433145e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.565914e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.010261e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.104681e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.473761e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.636309e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.584931e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.555697e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.303866e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.642280e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.831028e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.824016e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.325113e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.612307e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.788376e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.778614e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index 46804abf09..9204db3db0 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -2,21 +2,21 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/h make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-29_01:37:41 +DATE: 2024-08-09_00:48:38 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 3321 events (found 6423 events) - [COUNTERS] PROGRAM TOTAL : 0.8908s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8436s - [COUNTERS] Fortran MEs ( 1 ) : 0.0472s for 8192 events => throughput is 1.73E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9141s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8671s + [COUNTERS] Fortran MEs ( 1 ) : 0.0470s for 8192 events => throughput is 1.74E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4057s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3597s - [COUNTERS] Fortran MEs ( 1 ) : 0.0460s for 8192 events => throughput is 1.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4185s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3716s + [COUNTERS] Fortran MEs ( 1 ) : 0.0468s for 8192 events => throughput is 1.75E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.7090s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1999s - [COUNTERS] Fortran MEs ( 1 ) : 0.5092s for 90112 events => throughput is 1.77E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7982s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2863s + [COUNTERS] Fortran MEs ( 1 ) : 0.5119s for 90112 events => throughput is 1.76E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256148] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4491s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4002s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0489s for 8192 events => throughput is 1.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4199s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3695s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0500s for 8192 events => throughput is 1.64E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377564] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.8072s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2708s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5364s for 90112 events => throughput is 1.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8165s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2690s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5470s for 90112 events => throughput is 1.65E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.706986e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.683813e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.699867e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.668738e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256152] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4065s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3803s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0263s for 8192 events => throughput is 3.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4071s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3797s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 8192 events => throughput is 3.03E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377564] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.5505s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2580s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2925s for 90112 events => throughput is 3.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5672s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2711s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2957s for 90112 events => throughput is 3.05E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.034167e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.037815e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.993777e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.993910e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256232] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3843s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3683s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0160s for 8192 events => throughput is 5.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3883s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3715s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0164s for 8192 events => throughput is 5.00E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377489] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.4149s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2377s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1772s for 90112 events => throughput is 5.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4641s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2801s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1835s for 90112 events => throughput is 4.91E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.046299e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.902798e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.149674e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.886099e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256232] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3859s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3712s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0147s for 8192 events => throughput is 5.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3876s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3719s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0152s for 8192 events => throughput is 5.38E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377489] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.3953s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2347s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1605s for 90112 events => throughput is 5.61E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4216s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2567s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1645s for 90112 events => throughput is 5.48E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.314068e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.361206e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.459027e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.494947e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256152] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4002s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3784s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0218s for 8192 events => throughput is 3.76E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3960s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3733s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0222s for 8192 events => throughput is 3.68E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377560] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.5943s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3316s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2627s for 90112 events => throughput is 3.43E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5023s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2627s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2391s for 90112 events => throughput is 3.77E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.445922e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.615246e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.494379e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.662708e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256165] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.7849s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7843s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.40E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7949s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7934s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.20E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377573] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 2.2160s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2094s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 90112 events => throughput is 1.36E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7013s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6935s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0069s for 90112 events => throughput is 1.30E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.826997e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.844829e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.347084e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.285195e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.841221e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.255268e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.717455e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.760215e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.827200e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.235451e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.038168e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.038893e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.844247e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.241445e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.744588e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.725782e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index bbce3b7240..ae36851550 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -1,10 +1,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -make USEBUILDDIR=1 BACKEND=cuda - +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-29_01:38:08 +DATE: 2024-08-09_00:49:04 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 3321 events (found 6423 events) - [COUNTERS] PROGRAM TOTAL : 0.9065s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8605s - [COUNTERS] Fortran MEs ( 1 ) : 0.0460s for 8192 events => throughput is 1.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9394s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8922s + [COUNTERS] Fortran MEs ( 1 ) : 0.0473s for 8192 events => throughput is 1.73E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3960s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3500s - [COUNTERS] Fortran MEs ( 1 ) : 0.0460s for 8192 events => throughput is 1.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4203s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3728s + [COUNTERS] Fortran MEs ( 1 ) : 0.0475s for 8192 events => throughput is 1.72E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.7057s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1971s - [COUNTERS] Fortran MEs ( 1 ) : 0.5086s for 90112 events => throughput is 1.77E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7988s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2854s + [COUNTERS] Fortran MEs ( 1 ) : 0.5133s for 90112 events => throughput is 1.76E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162897355760356] fbridge_mode=1 [UNWEIGHT] Wrote 1620 events (found 1625 events) - [COUNTERS] PROGRAM TOTAL : 0.4481s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4028s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0453s for 8192 events => throughput is 1.81E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4180s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3713s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0463s for 8192 events => throughput is 1.77E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index 2d49c9f52b..d90f539fcf 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-29_01:38:14 +DATE: 2024-08-09_00:49:10 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 3321 events (found 6423 events) - [COUNTERS] PROGRAM TOTAL : 0.8895s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8431s - [COUNTERS] Fortran MEs ( 1 ) : 0.0464s for 8192 events => throughput is 1.77E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9158s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8684s + [COUNTERS] Fortran MEs ( 1 ) : 0.0474s for 8192 events => throughput is 1.73E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4135s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3638s - [COUNTERS] Fortran MEs ( 1 ) : 0.0497s for 8192 events => throughput is 1.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4209s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3739s + [COUNTERS] Fortran MEs ( 1 ) : 0.0470s for 8192 events => throughput is 1.74E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.7047s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1950s - [COUNTERS] Fortran MEs ( 1 ) : 0.5097s for 90112 events => throughput is 1.77E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8008s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2889s + [COUNTERS] Fortran MEs ( 1 ) : 0.5118s for 90112 events => throughput is 1.76E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955975930954] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4487s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4005s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0482s for 8192 events => throughput is 1.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4229s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3736s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0488s for 8192 events => throughput is 1.68E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -168,9 +169,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895706383660] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.8098s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2747s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5351s for 90112 events => throughput is 1.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8077s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2621s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5452s for 90112 events => throughput is 1.65E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -184,13 +186,13 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.605995e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.584312e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.601314e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.572139e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +216,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955975930958] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4069s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3802s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0266s for 8192 events => throughput is 3.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4000s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3717s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0278s for 8192 events => throughput is 2.94E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -248,9 +251,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895706383669] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.5495s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2534s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2962s for 90112 events => throughput is 3.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6068s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3000s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3063s for 90112 events => throughput is 2.94E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -264,13 +268,13 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.929578e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.801476e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.898682e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.739519e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,9 +298,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955953696393] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3820s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3660s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0160s for 8192 events => throughput is 5.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4107s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3912s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0191s for 8192 events => throughput is 4.29E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -328,9 +333,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895701245432] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.4322s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2507s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1815s for 90112 events => throughput is 4.96E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4541s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2695s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1842s for 90112 events => throughput is 4.89E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -344,13 +350,13 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.830829e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.846731e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.788599e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.806331e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,9 +380,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955953696393] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3978s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3830s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0147s for 8192 events => throughput is 5.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3903s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3744s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0155s for 8192 events => throughput is 5.29E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -408,9 +415,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895701245432] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.4028s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2374s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1654s for 90112 events => throughput is 5.45E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4306s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2629s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1673s for 90112 events => throughput is 5.39E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -424,13 +432,13 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.250961e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.198253e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.207264e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.334338e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +462,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955953691082] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4003s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3781s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0222s for 8192 events => throughput is 3.69E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4086s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3841s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0240s for 8192 events => throughput is 3.41E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -488,9 +497,10 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895701243878] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.4957s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2503s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2454s for 90112 events => throughput is 3.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5232s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2714s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2514s for 90112 events => throughput is 3.58E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -504,13 +514,13 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.177035e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.375382e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.405731e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.300552e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -533,9 +543,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955503257827] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.7815s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7809s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.39E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7989s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7974s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.20E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -566,9 +577,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895242795732] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.6630s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6565s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0065s for 90112 events => throughput is 1.38E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6979s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6904s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 90112 events => throughput is 1.36E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -581,42 +593,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.830803e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.835154e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.306249e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.144694e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.830251e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.230105e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.737584e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.705062e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.843710e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.235322e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.038788e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.035545e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.827828e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.242431e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.738262e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.754474e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index cacd0f35d9..5562e4c07e 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -3,9 +3,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppnone + +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-29_01:40:12 +DATE: 2024-08-09_00:52:08 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 1041 events) - [COUNTERS] PROGRAM TOTAL : 2.6435s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3260s - [COUNTERS] Fortran MEs ( 1 ) : 2.3175s for 8192 events => throughput is 3.53E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5941s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3442s + [COUNTERS] Fortran MEs ( 1 ) : 2.2499s for 8192 events => throughput is 3.64E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.5471s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3268s - [COUNTERS] Fortran MEs ( 1 ) : 2.2203s for 8192 events => throughput is 3.69E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6220s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3462s + [COUNTERS] Fortran MEs ( 1 ) : 2.2759s for 8192 events => throughput is 3.60E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > / [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 26.2382s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7627s - [COUNTERS] Fortran MEs ( 1 ) : 24.4756s for 90112 events => throughput is 3.68E+03 events/s + [COUNTERS] PROGRAM TOTAL : 26.7017s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8086s + [COUNTERS] Fortran MEs ( 1 ) : 24.8931s for 90112 events => throughput is 3.62E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 5.0170s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6185s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.3985s for 8192 events => throughput is 3.42E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7821s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3463s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.4305s for 8192 events => throughput is 3.37E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0052s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438187E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 30.4436s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0366s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.4070s for 90112 events => throughput is 3.41E+03 events/s + [COUNTERS] PROGRAM TOTAL : 28.5017s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7808s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.7158s for 90112 events => throughput is 3.37E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0052s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.563022e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.542884e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.568130e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.530103e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084412E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.8190s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5560s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2630s for 8192 events => throughput is 6.49E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.6103s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3441s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2634s for 8192 events => throughput is 6.48E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 16.9508s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9389s - [COUNTERS] CudaCpp MEs ( 2 ) : 14.0119s for 90112 events => throughput is 6.43E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.9197s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7936s + [COUNTERS] CudaCpp MEs ( 2 ) : 14.1234s for 90112 events => throughput is 6.38E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.403640e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.656588e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.805258e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.664988e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.4145s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8628s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5516s for 8192 events => throughput is 1.49E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9116s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3446s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5653s for 8192 events => throughput is 1.45E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0017s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 8.4794s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2982s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.1812s for 90112 events => throughput is 1.46E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.0033s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7755s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.2261s for 90112 events => throughput is 1.45E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0017s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.512239e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.485686e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.514846e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.488153e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.3031s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8025s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5006s for 8192 events => throughput is 1.64E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8483s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3476s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4991s for 8192 events => throughput is 1.64E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 7.6774s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2371s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.4403s for 90112 events => throughput is 1.66E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.2914s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7820s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.5079s for 90112 events => throughput is 1.64E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.729653e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.693554e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.736018e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.678028e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.5822s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9519s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6302s for 8192 events => throughput is 1.30E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9859s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3430s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6411s for 8192 events => throughput is 1.28E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 9.3263s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3416s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.9846s for 90112 events => throughput is 1.29E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.8930s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7934s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.0976s for 90112 events => throughput is 1.27E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0021s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.291281e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.269596e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.310298e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.304260e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftgg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8138s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7967s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0171s for 8192 events => throughput is 4.79E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8106s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7739s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0172s for 8192 events => throughput is 4.76E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0196s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftgg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 2.3691s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1805s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1886s for 90112 events => throughput is 4.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4031s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1951s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1884s for 90112 events => throughput is 4.78E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0195s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.833128e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.836004e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.235176e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.223426e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.111711e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.196129e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.408923e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.417377e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.159800e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.149870e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.418265e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.416796e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.120081e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.156718e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.756468e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.752894e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index 92432a70ab..e6a1cba79b 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -1,10 +1,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx - make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone + +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-29_01:42:44 +DATE: 2024-08-09_00:54:32 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 1041 events) - [COUNTERS] PROGRAM TOTAL : 2.5425s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3274s - [COUNTERS] Fortran MEs ( 1 ) : 2.2151s for 8192 events => throughput is 3.70E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6010s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3425s + [COUNTERS] Fortran MEs ( 1 ) : 2.2584s for 8192 events => throughput is 3.63E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.5437s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3275s - [COUNTERS] Fortran MEs ( 1 ) : 2.2162s for 8192 events => throughput is 3.70E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6135s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3438s + [COUNTERS] Fortran MEs ( 1 ) : 2.2696s for 8192 events => throughput is 3.61E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > / [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 26.3201s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7716s - [COUNTERS] Fortran MEs ( 1 ) : 24.5485s for 90112 events => throughput is 3.67E+03 events/s + [COUNTERS] PROGRAM TOTAL : 26.5878s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7922s + [COUNTERS] Fortran MEs ( 1 ) : 24.7956s for 90112 events => throughput is 3.63E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896784952157763E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 4.9557s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5931s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.3626s for 8192 events => throughput is 3.47E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7487s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3437s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.4000s for 8192 events => throughput is 3.41E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0050s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668138450782073E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 29.8768s - [COUNTERS] Fortran Overhead ( 0 ) : 3.9900s - [COUNTERS] CudaCpp MEs ( 2 ) : 25.8868s for 90112 events => throughput is 3.48E+03 events/s + [COUNTERS] PROGRAM TOTAL : 28.1446s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7932s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.3466s for 90112 events => throughput is 3.42E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0048s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.615815e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.577022e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.588760e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.590866e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896766542858863E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.7362s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0215s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7147s for 8192 events => throughput is 1.15E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.0076s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3437s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6623s for 8192 events => throughput is 1.24E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668121906848987E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 9.5915s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3718s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.2197s for 90112 events => throughput is 1.25E+04 events/s + [COUNTERS] PROGRAM TOTAL : 9.0575s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7825s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.2734s for 90112 events => throughput is 1.24E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.287471e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.265218e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.287472e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.265996e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896764408326359E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8858s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6041s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2817s for 8192 events => throughput is 2.91E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6296s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3461s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2826s for 8192 events => throughput is 2.90E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668124799901306E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 5.1146s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9843s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.1303s for 90112 events => throughput is 2.88E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.9000s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7718s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.1273s for 90112 events => throughput is 2.88E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.952012e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.939784e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.978134e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.964350e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896764408326359E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8292s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5726s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2566s for 8192 events => throughput is 3.19E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6110s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3506s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2595s for 8192 events => throughput is 3.16E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668124799901306E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 4.7825s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9553s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.8272s for 90112 events => throughput is 3.19E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.6623s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7820s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.8794s for 90112 events => throughput is 3.13E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.330203e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.263231e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.291014e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.247254e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896778056937195E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.9527s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6385s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3142s for 8192 events => throughput is 2.61E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6684s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3460s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3212s for 8192 events => throughput is 2.55E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0013s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668139178203571E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 5.5476s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0509s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.4967s for 90112 events => throughput is 2.58E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.3279s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7717s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.5549s for 90112 events => throughput is 2.53E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0013s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.605632e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.589261e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.608873e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.602723e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftgg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896802503195373E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8048s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7896s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0152s for 8192 events => throughput is 5.39E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8100s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7757s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0172s for 8192 events => throughput is 4.77E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0171s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftgg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668190930428073E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 2.3385s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1704s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1681s for 90112 events => throughput is 5.36E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3814s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1945s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1700s for 90112 events => throughput is 5.30E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0169s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,37 +573,37 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.899244e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.860775e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.168943e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.139558e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.330723e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.304686e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.347926e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.344126e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.329833e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.335964e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.345608e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.345203e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.313833e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.314317e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index 7abb61d6c6..7e343e91b1 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -13,8 +13,8 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-29_01:44:48 +DATE: 2024-08-09_00:56:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 1041 events) - [COUNTERS] PROGRAM TOTAL : 2.5379s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3249s - [COUNTERS] Fortran MEs ( 1 ) : 2.2131s for 8192 events => throughput is 3.70E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5870s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3434s + [COUNTERS] Fortran MEs ( 1 ) : 2.2435s for 8192 events => throughput is 3.65E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.5505s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3304s - [COUNTERS] Fortran MEs ( 1 ) : 2.2201s for 8192 events => throughput is 3.69E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5935s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3429s + [COUNTERS] Fortran MEs ( 1 ) : 2.2507s for 8192 events => throughput is 3.64E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > / [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 26.2622s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7530s - [COUNTERS] Fortran MEs ( 1 ) : 24.5092s for 90112 events => throughput is 3.68E+03 events/s + [COUNTERS] PROGRAM TOTAL : 26.4482s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7926s + [COUNTERS] Fortran MEs ( 1 ) : 24.6556s for 90112 events => throughput is 3.65E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696375074447E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 5.0598s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6439s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.4159s for 8192 events => throughput is 3.39E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7899s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3466s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.4385s for 8192 events => throughput is 3.36E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0049s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668081976882373E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 30.6006s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0492s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.5514s for 90112 events => throughput is 3.39E+03 events/s + [COUNTERS] PROGRAM TOTAL : 28.6799s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7926s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.8820s for 90112 events => throughput is 3.35E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0052s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.474454e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.507267e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.439517e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.511786e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696285825688E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.7397s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5249s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2148s for 8192 events => throughput is 6.74E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.5883s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3421s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2436s for 8192 events => throughput is 6.59E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668081890954375E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 16.3722s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9270s - [COUNTERS] CudaCpp MEs ( 2 ) : 13.4452s for 90112 events => throughput is 6.70E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.4498s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7701s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.6770s for 90112 events => throughput is 6.59E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.040827e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.943689e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.981328e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.925887e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.4085s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8620s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5465s for 8192 events => throughput is 1.50E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9098s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3504s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5576s for 8192 events => throughput is 1.47E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 8.3202s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2754s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.0449s for 90112 events => throughput is 1.49E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.9207s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7702s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.1490s for 90112 events => throughput is 1.47E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.536921e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.518105e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.529532e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.514088e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.2826s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7966s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4860s for 8192 events => throughput is 1.69E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8334s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3445s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4873s for 8192 events => throughput is 1.68E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 7.5114s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1898s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.3216s for 90112 events => throughput is 1.69E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.1725s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7642s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.4067s for 90112 events => throughput is 1.67E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.741888e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.710218e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.760590e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.722202e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.5988s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9602s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6386s for 8192 events => throughput is 1.28E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9928s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3430s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6479s for 8192 events => throughput is 1.26E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0019s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 9.4068s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3459s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.0609s for 90112 events => throughput is 1.28E+04 events/s + [COUNTERS] PROGRAM TOTAL : 9.0659s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7892s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.2749s for 90112 events => throughput is 1.24E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.302056e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.210214e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.302590e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.254889e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftgg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697918297644E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8079s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7907s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0172s for 8192 events => throughput is 4.77E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8127s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7760s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0173s for 8192 events => throughput is 4.75E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0195s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftgg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551547592E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 2.5070s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3171s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1899s for 90112 events => throughput is 4.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4045s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1952s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1896s for 90112 events => throughput is 4.75E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0197s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.814826e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.814747e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.215347e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.187533e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.080489e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.164029e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.380958e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.389995e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.077544e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.128645e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.380568e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.372948e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.123241e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.119403e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.746541e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.750060e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index 6e0ebf0fe6..0fe0851e40 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -1,8 +1,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x - make USEBUILDDIR=1 BACKEND=cuda + make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-07-23_17:06:30 +DATE: 2024-08-09_00:50:54 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0 [UNWEIGHT] Wrote 1767 events (found 4306 events) - [COUNTERS] PROGRAM TOTAL : 0.6347s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6253s - [COUNTERS] Fortran MEs ( 1 ) : 0.0093s for 8192 events => throughput is 8.79E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6580s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6494s + [COUNTERS] Fortran MEs ( 1 ) : 0.0085s for 8192 events => throughput is 9.58E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3827s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3737s - [COUNTERS] Fortran MEs ( 1 ) : 0.0089s for 8192 events => throughput is 9.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3938s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3851s + [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.50E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098556244384407] fbridge_mode=0 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3840s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2894s - [COUNTERS] Fortran MEs ( 1 ) : 0.0946s for 90112 events => throughput is 9.53E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4272s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3345s + [COUNTERS] Fortran MEs ( 1 ) : 0.0927s for 90112 events => throughput is 9.72E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869291] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3976s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3899s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0077s for 8192 events => throughput is 1.06E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3960s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3874s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 8192 events => throughput is 9.99E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098556244384418] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3567s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2681s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0886s for 90112 events => throughput is 1.02E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.4271s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3353s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0914s for 90112 events => throughput is 9.86E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.005595e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.006217e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.012216e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.022578e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869291] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3752s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3708s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0044s for 8192 events => throughput is 1.84E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3903s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3856s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0043s for 8192 events => throughput is 1.89E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098556244384418] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3147s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2675s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0472s for 90112 events => throughput is 1.91E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3937s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3444s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0489s for 90112 events => throughput is 1.84E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.909460e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.897485e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.971415e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.985824e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869291] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3705s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3679s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.18E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3921s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3888s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.88E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098556244384418] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.2978s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2679s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0299s for 90112 events => throughput is 3.01E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3531s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3221s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0306s for 90112 events => throughput is 2.95E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.080442e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.126014e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.074278e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.364824e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869291] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3716s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3691s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.23E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3883s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3854s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.20E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098556244384418] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3003s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2717s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0286s for 90112 events => throughput is 3.15E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3635s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3336s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0295s for 90112 events => throughput is 3.05E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.360999e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.285096e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.517113e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.423598e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869291] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3734s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3703s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.70E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3910s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3874s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.63E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098556244384418] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3066s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2761s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0304s for 90112 events => throughput is 2.96E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3563s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3235s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0323s for 90112 events => throughput is 2.79E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.892371e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.866364e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.222820e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.134151e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869280] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.8041s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8036s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.64E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8164s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8152s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.37E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098556244384401] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.7080s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7028s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0051s for 90112 events => throughput is 1.75E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7576s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7518s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0052s for 90112 events => throughput is 1.72E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.783715e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.730366e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.058188e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.967481e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.170829e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.198830e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.604074e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.649618e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.147861e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.170218e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.994548e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.903772e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.155905e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.201664e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.319425e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.319844e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index 0993fdcc1c..5c4b04cd13 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-07-23_17:06:54 +DATE: 2024-08-09_00:51:19 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0 [UNWEIGHT] Wrote 1767 events (found 4306 events) - [COUNTERS] PROGRAM TOTAL : 0.6621s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6530s - [COUNTERS] Fortran MEs ( 1 ) : 0.0091s for 8192 events => throughput is 9.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6497s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6414s + [COUNTERS] Fortran MEs ( 1 ) : 0.0083s for 8192 events => throughput is 9.86E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3966s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3872s - [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.74E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4039s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3951s + [COUNTERS] Fortran MEs ( 1 ) : 0.0089s for 8192 events => throughput is 9.25E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098556244384407] fbridge_mode=0 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3966s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3008s - [COUNTERS] Fortran MEs ( 1 ) : 0.0958s for 90112 events => throughput is 9.40E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4878s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3911s + [COUNTERS] Fortran MEs ( 1 ) : 0.0967s for 90112 events => throughput is 9.32E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156021439979276] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3872s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3787s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0086s for 8192 events => throughput is 9.57E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3975s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3887s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0084s for 8192 events => throughput is 9.70E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098550550786874] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3681s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2789s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0892s for 90112 events => throughput is 1.01E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.4264s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3345s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0916s for 90112 events => throughput is 9.84E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.015948e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.034265e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.021671e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.024334e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156021343761686] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3766s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3741s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.22E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3905s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3875s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.09E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098550488814170] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3081s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2794s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0287s for 90112 events => throughput is 3.14E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3711s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3420s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0289s for 90112 events => throughput is 3.12E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.304389e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.288372e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.380080e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.432097e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156021516056748] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3710s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3693s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0017s for 8192 events => throughput is 4.90E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3889s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3868s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.52E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098550596898289] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.2810s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2620s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0191s for 90112 events => throughput is 4.72E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3432s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3229s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0200s for 90112 events => throughput is 4.50E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.883752e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.077269e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.254694e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.403997e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156021516056748] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3739s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3722s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0017s for 8192 events => throughput is 4.74E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3869s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3848s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.55E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098550596898289] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3493s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3299s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0195s for 90112 events => throughput is 4.63E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3387s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3197s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0187s for 90112 events => throughput is 4.81E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.263107e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.322495e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.596148e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.427973e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156021917867366] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3804s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3783s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0020s for 8192 events => throughput is 4.02E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3878s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3853s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.78E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098551029624061] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.2838s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2628s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0210s for 90112 events => throughput is 4.30E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3406s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3185s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0218s for 90112 events => throughput is 4.14E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.410618e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.424607e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.579934e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.888963e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156022290359153] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.8021s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8016s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.58E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8169s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8154s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.46E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098551341908548] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.7042s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6994s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0047s for 90112 events => throughput is 1.90E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7464s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7407s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.85E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.004673e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.032627e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.352806e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.278657e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.848697e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.543019e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.574226e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.578539e+09 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.876693e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.555176e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.655828e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.658200e+09 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.468075e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.883073e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.699037e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.705532e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index f51812e183..62624c2c92 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -1,9 +1,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-07-23_17:07:18 +DATE: 2024-08-09_00:51:44 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0 [UNWEIGHT] Wrote 1767 events (found 4306 events) - [COUNTERS] PROGRAM TOTAL : 0.6364s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6273s - [COUNTERS] Fortran MEs ( 1 ) : 0.0091s for 8192 events => throughput is 8.96E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6493s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6409s + [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.81E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3980s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3884s - [COUNTERS] Fortran MEs ( 1 ) : 0.0096s for 8192 events => throughput is 8.50E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3992s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3909s + [COUNTERS] Fortran MEs ( 1 ) : 0.0083s for 8192 events => throughput is 9.85E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098556244384407] fbridge_mode=0 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.4071s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3113s - [COUNTERS] Fortran MEs ( 1 ) : 0.0958s for 90112 events => throughput is 9.41E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4133s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3208s + [COUNTERS] Fortran MEs ( 1 ) : 0.0925s for 90112 events => throughput is 9.75E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156028014369008] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3870s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3789s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 8192 events => throughput is 1.00E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3950s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3864s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 8192 events => throughput is 9.94E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098557069460298] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3664s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2748s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0916s for 90112 events => throughput is 9.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4087s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3177s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0906s for 90112 events => throughput is 9.95E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.803105e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.803386e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.833012e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.910254e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156028014369008] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3826s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3782s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0044s for 8192 events => throughput is 1.86E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3923s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3874s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0045s for 8192 events => throughput is 1.82E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098557069460298] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3196s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2730s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0466s for 90112 events => throughput is 1.93E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3653s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3175s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0474s for 90112 events => throughput is 1.90E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.966382e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.964224e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.002002e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.028853e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156028097537258] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3795s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3765s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.76E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3954s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3923s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 3.03E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098557141632605] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3017s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2730s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0287s for 90112 events => throughput is 3.14E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3415s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3131s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0280s for 90112 events => throughput is 3.22E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.113986e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.237365e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.514629e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.416021e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156028097537258] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3847s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3820s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.12E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3940s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3909s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 3.04E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098557141632605] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3019s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2739s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0280s for 90112 events => throughput is 3.22E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3467s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3184s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0279s for 90112 events => throughput is 3.23E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.470466e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.347126e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.587854e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.589308e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156028097537258] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3731s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3703s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.87E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3978s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3942s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.66E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098557141632605] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3054s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2744s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0310s for 90112 events => throughput is 2.91E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3501s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3186s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0311s for 90112 events => throughput is 2.90E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.935388e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.904623e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.012886e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.114835e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027194560187] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.8059s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8054s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.56E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8152s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8140s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.39E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098556243340819] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.7003s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6952s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0051s for 90112 events => throughput is 1.78E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7501s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7444s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0051s for 90112 events => throughput is 1.75E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.612145e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.842332e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.945612e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.019027e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.146137e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.214756e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.463844e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.517612e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.146854e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.171297e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.862724e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.740991e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.162271e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.214875e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.282910e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.310258e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index 679246dd46..6131633fdd 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -1,7 +1,7 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-29_01:38:40 +DATE: 2024-08-09_00:49:37 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.7777s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7362s - [COUNTERS] Fortran MEs ( 1 ) : 0.0415s for 8192 events => throughput is 1.97E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8016s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7599s + [COUNTERS] Fortran MEs ( 1 ) : 0.0418s for 8192 events => throughput is 1.96E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3976s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3565s - [COUNTERS] Fortran MEs ( 1 ) : 0.0411s for 8192 events => throughput is 1.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4173s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3758s + [COUNTERS] Fortran MEs ( 1 ) : 0.0415s for 8192 events => throughput is 1.97E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.6909s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2365s - [COUNTERS] Fortran MEs ( 1 ) : 0.4544s for 90112 events => throughput is 1.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6984s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2478s + [COUNTERS] Fortran MEs ( 1 ) : 0.4506s for 90112 events => throughput is 2.00E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419863] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4458s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4030s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0427s for 8192 events => throughput is 1.92E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4145s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3702s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0438s for 8192 events => throughput is 1.87E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256471] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.7824s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3031s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4793s for 90112 events => throughput is 1.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7366s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2536s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4825s for 90112 events => throughput is 1.87E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.924806e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.880754e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.912421e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.882930e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4066s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3820s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0246s for 8192 events => throughput is 3.33E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3960s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3713s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0243s for 8192 events => throughput is 3.37E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256471] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.5413s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2750s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2663s for 90112 events => throughput is 3.38E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5199s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2483s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2711s for 90112 events => throughput is 3.32E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.314362e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.302363e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.440069e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.365112e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3874s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3723s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0150s for 8192 events => throughput is 5.45E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3924s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3765s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0155s for 8192 events => throughput is 5.28E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.4296s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2659s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1637s for 90112 events => throughput is 5.50E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4183s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2503s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1675s for 90112 events => throughput is 5.38E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.062177e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.278183e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.382563e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.374748e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3976s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3833s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0144s for 8192 events => throughput is 5.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3894s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3754s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0136s for 8192 events => throughput is 6.02E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.4167s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2662s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1505s for 90112 events => throughput is 5.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3978s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2454s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1520s for 90112 events => throughput is 5.93E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.763603e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.775498e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.871042e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.841522e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3996s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3785s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0211s for 8192 events => throughput is 3.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4047s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3821s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0222s for 8192 events => throughput is 3.70E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.5325s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2948s + [COUNTERS] PROGRAM TOTAL : 1.4927s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2545s [COUNTERS] CudaCpp MEs ( 2 ) : 0.2377s for 90112 events => throughput is 3.79E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.502625e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.798876e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.556287e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.612840e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419849] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.8405s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8399s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.41E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8126s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8111s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.24E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.6883s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6820s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 90112 events => throughput is 1.42E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6862s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6788s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 90112 events => throughput is 1.37E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.919891e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.869432e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.630666e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.714086e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.869333e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.311155e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.082168e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.083882e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.898237e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.322734e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.164202e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.159310e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.905759e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.296675e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.090826e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.098537e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index 9e00b5e78a..58b86df658 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -2,12 +2,12 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s make USEBUILDDIR=1 BACKEND=cuda - - make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 + +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-29_01:39:07 +DATE: 2024-08-09_00:50:03 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,8 +58,8 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.7827s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7412s + [COUNTERS] PROGRAM TOTAL : 0.8051s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7635s [COUNTERS] Fortran MEs ( 1 ) : 0.0416s for 8192 events => throughput is 1.97E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3949s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3540s - [COUNTERS] Fortran MEs ( 1 ) : 0.0410s for 8192 events => throughput is 2.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4148s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3740s + [COUNTERS] Fortran MEs ( 1 ) : 0.0408s for 8192 events => throughput is 2.01E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.6850s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2308s - [COUNTERS] Fortran MEs ( 1 ) : 0.4541s for 90112 events => throughput is 1.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7188s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2615s + [COUNTERS] Fortran MEs ( 1 ) : 0.4573s for 90112 events => throughput is 1.97E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598853620719339] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4401s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3996s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0405s for 8192 events => throughput is 2.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4164s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3751s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0410s for 8192 events => throughput is 2.00E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577522280119403] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.7388s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2934s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4454s for 90112 events => throughput is 2.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7041s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2499s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4538s for 90112 events => throughput is 1.99E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.042800e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.004528e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.015984e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.989674e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598849697851406] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3877s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3717s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0160s for 8192 events => throughput is 5.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3933s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3758s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0172s for 8192 events => throughput is 4.76E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577518590213366] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.4390s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2600s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1790s for 90112 events => throughput is 5.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4571s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2702s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1866s for 90112 events => throughput is 4.83E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.765299e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.766493e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.747967e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.711541e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598850036412124] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3742s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3654s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0087s for 8192 events => throughput is 9.37E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3932s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3838s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0091s for 8192 events => throughput is 8.99E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577518612400254] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.3503s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2561s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0942s for 90112 events => throughput is 9.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3456s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2495s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0958s for 90112 events => throughput is 9.40E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.065332e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.204759e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.552613e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.210555e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598850036412124] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3831s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3750s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 8192 events => throughput is 1.00E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3855s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3769s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0083s for 8192 events => throughput is 9.85E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577518612400254] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.3419s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2529s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0890s for 90112 events => throughput is 1.01E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3394s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2483s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0908s for 90112 events => throughput is 9.92E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.815596e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.706656e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.012011e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.233766e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598854350242270] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3801s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3696s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0106s for 8192 events => throughput is 7.74E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3868s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3748s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0116s for 8192 events => throughput is 7.03E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577522751628507] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.3921s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2698s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1223s for 90112 events => throughput is 7.37E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3825s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2565s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1256s for 90112 events => throughput is 7.17E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.916519e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.942843e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.893526e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.910825e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598870301426373] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.7890s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7885s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.59E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8091s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8078s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.43E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +557,10 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577527268256027] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.6767s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6712s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 90112 events => throughput is 1.65E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7098s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7033s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0058s for 90112 events => throughput is 1.56E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.812781e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.705094e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.214634e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.269887e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.018876e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.888199e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.400207e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.391800e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.038902e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.898622e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.501990e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.539526e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.639781e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.473018e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.489136e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.495430e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index e096eb78b5..75d0c77429 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -6,8 +6,8 @@ make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' @@ -18,10 +18,10 @@ make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-29_01:39:32 +DATE: 2024-08-09_00:50:28 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.7859s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7445s - [COUNTERS] Fortran MEs ( 1 ) : 0.0414s for 8192 events => throughput is 1.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8208s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7796s + [COUNTERS] Fortran MEs ( 1 ) : 0.0412s for 8192 events => throughput is 1.99E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3977s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3560s - [COUNTERS] Fortran MEs ( 1 ) : 0.0416s for 8192 events => throughput is 1.97E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4160s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3749s + [COUNTERS] Fortran MEs ( 1 ) : 0.0411s for 8192 events => throughput is 1.99E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.6901s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2335s - [COUNTERS] Fortran MEs ( 1 ) : 0.4566s for 90112 events => throughput is 1.97E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7104s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2559s + [COUNTERS] Fortran MEs ( 1 ) : 0.4544s for 90112 events => throughput is 1.98E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598861353577519] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4458s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4017s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0440s for 8192 events => throughput is 1.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4204s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3749s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0450s for 8192 events => throughput is 1.82E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +167,10 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577525144126803] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.7736s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2953s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4783s for 90112 events => throughput is 1.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7448s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2577s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4867s for 90112 events => throughput is 1.85E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.906547e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.873127e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.885100e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.907422e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +211,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598861353577519] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4069s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3833s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0236s for 8192 events => throughput is 3.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3960s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3712s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0244s for 8192 events => throughput is 3.36E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +245,10 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577525144126810] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.5337s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2711s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2626s for 90112 events => throughput is 3.43E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5269s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2579s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2686s for 90112 events => throughput is 3.35E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.321004e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.333942e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.457355e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.376975e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +289,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598861344883289] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3969s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3823s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0147s for 8192 events => throughput is 5.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3926s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3769s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0153s for 8192 events => throughput is 5.37E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +323,10 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577525178109212] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.4292s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2669s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1623s for 90112 events => throughput is 5.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4173s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2508s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1662s for 90112 events => throughput is 5.42E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.432194e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.335642e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.311023e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.330908e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +367,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598861344883289] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3859s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3721s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0138s for 8192 events => throughput is 5.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3897s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3750s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0143s for 8192 events => throughput is 5.74E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +401,10 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577525178109212] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.4048s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2571s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1477s for 90112 events => throughput is 6.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4068s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2528s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1536s for 90112 events => throughput is 5.87E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.922477e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.855366e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.992472e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.947430e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +445,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598861344883289] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4037s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3836s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0201s for 8192 events => throughput is 4.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3995s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3772s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0219s for 8192 events => throughput is 3.75E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +479,10 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577525178109212] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.4944s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2693s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2251s for 90112 events => throughput is 4.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4943s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2580s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2358s for 90112 events => throughput is 3.82E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.827539e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.733262e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.759224e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.702855e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +523,10 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860056955807] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.7908s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7902s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.43E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8053s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8039s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.21E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,8 +558,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] Cross section = 44.58 [44.577523872560512] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) [COUNTERS] PROGRAM TOTAL : 1.6927s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6862s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0065s for 90112 events => throughput is 1.38E+07 events/s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6853s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 90112 events => throughput is 1.36E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +573,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.009355e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.871837e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.608692e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.622666e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.895025e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.299743e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.063290e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.055606e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.880064e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.302003e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.136844e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.140289e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.885288e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.319830e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.014417e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.983678e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***