diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index b8847b7cb6..3270f7fc47 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,10 +1,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum - make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:15:24 +DATE: 2024-07-29_22:03:36 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.7267s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7180s - [COUNTERS] Fortran MEs ( 1 ) : 0.0087s for 8192 events => throughput is 9.36E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7180s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7099s + [COUNTERS] Fortran MEs ( 1 ) : 0.0081s for 8192 events => throughput is 1.01E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1784s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1702s - [COUNTERS] Fortran MEs ( 1 ) : 0.0082s for 8192 events => throughput is 1.00E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1930s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1842s + [COUNTERS] Fortran MEs ( 1 ) : 0.0088s for 8192 events => throughput is 9.34E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3705s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2837s - [COUNTERS] Fortran MEs ( 1 ) : 0.0868s for 90112 events => throughput is 1.04E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3931s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3008s + [COUNTERS] Fortran MEs ( 1 ) : 0.0923s for 90112 events => throughput is 9.76E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661545E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1849s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1783s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 8192 events => throughput is 1.24E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1843s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1773s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0070s for 8192 events => throughput is 1.18E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3665s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2911s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0754s for 90112 events => throughput is 1.19E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3651s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2894s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0757s for 90112 events => throughput is 1.19E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.152979e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.162339e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.181726e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.166702e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1799s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1755s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0044s for 8192 events => throughput is 1.87E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1786s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1743s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0044s for 8192 events => throughput is 1.88E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3337s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2882s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0455s for 90112 events => throughput is 1.98E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3345s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2886s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0459s for 90112 events => throughput is 1.96E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.930086e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.940184e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.984749e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.010256e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1789s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1758s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.61E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1854s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1821s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.48E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3219s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2881s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0338s for 90112 events => throughput is 2.67E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3247s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2903s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0344s for 90112 events => throughput is 2.62E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.539184e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.500310e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.709927e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.685692e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1781s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1751s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.77E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1737s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1708s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.81E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3264s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2932s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0332s for 90112 events => throughput is 2.71E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3222s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2890s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0332s for 90112 events => throughput is 2.72E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.635946e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.690097e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.766552e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.829998e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1769s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1731s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0038s for 8192 events => throughput is 2.16E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1791s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1752s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.12E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3421s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3007s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0414s for 90112 events => throughput is 2.18E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3320s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2920s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0399s for 90112 events => throughput is 2.26E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.124836e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.163616e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.174846e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.222879e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.6131s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6126s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.49E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6002s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5997s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.63E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.7401s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7351s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0050s for 90112 events => throughput is 1.79E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7158s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7110s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0048s for 90112 events => throughput is 1.87E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.749639e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.428189e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.937376e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.934676e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.647566e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.272618e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.462558e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.410420e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.641748e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.252201e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.002002e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.033305e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.659137e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.249117e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.136180e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.140359e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 587bb76d73..422001bad8 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,8 +1,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum - make USEBUILDDIR=1 BACKEND=cuda + make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 @@ -13,16 +13,16 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:15:40 +DATE: 2024-07-29_22:03:52 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.7250s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7166s - [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.79E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6898s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6816s + [COUNTERS] Fortran MEs ( 1 ) : 0.0083s for 8192 events => throughput is 9.93E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1873s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1787s - [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.52E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1795s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1712s + [COUNTERS] Fortran MEs ( 1 ) : 0.0083s for 8192 events => throughput is 9.89E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3976s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3042s - [COUNTERS] Fortran MEs ( 1 ) : 0.0933s for 90112 events => throughput is 9.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3755s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2866s + [COUNTERS] Fortran MEs ( 1 ) : 0.0889s for 90112 events => throughput is 1.01E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382703205998396E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1981s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1911s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0070s for 8192 events => throughput is 1.17E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1830s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1765s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0065s for 8192 events => throughput is 1.25E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515590123565249E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3643s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2913s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0730s for 90112 events => throughput is 1.24E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3634s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2909s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0724s for 90112 events => throughput is 1.24E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.200646e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.233232e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.237867e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.213402e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382700723828302E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1729s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1703s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.23E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1811s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1785s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.20E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515587612890761E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3200s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2910s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0290s for 90112 events => throughput is 3.10E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3386s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3084s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0302s for 90112 events => throughput is 2.98E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.187376e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.061654e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.220665e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.316531e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382700679354239E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1773s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1751s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.62E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1890s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1864s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.22E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515587619408464E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3126s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2872s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0254s for 90112 events => throughput is 3.54E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3297s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3026s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 90112 events => throughput is 3.33E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.488004e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.476693e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.612737e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.603758e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382700679354239E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1769s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1747s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.74E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1879s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1855s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.36E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515587619408464E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3121s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2873s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0247s for 90112 events => throughput is 3.65E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3176s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2924s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0253s for 90112 events => throughput is 3.56E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.702442e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.442326e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.804759e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.734795e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382704335459282E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1768s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1743s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.39E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1779s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1756s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.64E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515591296252558E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3213s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2952s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0261s for 90112 events => throughput is 3.45E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3206s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2940s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0265s for 90112 events => throughput is 3.40E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.323941e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.289339e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.761942e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.727530e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382706077425631E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.5917s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5912s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.66E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6050s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6046s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.71E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515592892887687E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.7121s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7074s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0047s for 90112 events => throughput is 1.92E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7109s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7063s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0046s for 90112 events => throughput is 1.96E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.122217e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.796169e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.609665e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.725216e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.582586e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.760033e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.902910e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.880186e+09 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.573732e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.757117e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.085324e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.091088e+09 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.048658e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.038305e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.737473e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.792009e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index f580fe1044..768f7e8724 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:15:57 +DATE: 2024-07-29_22:04:10 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.6825s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6742s - [COUNTERS] Fortran MEs ( 1 ) : 0.0083s for 8192 events => throughput is 9.83E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6881s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6794s + [COUNTERS] Fortran MEs ( 1 ) : 0.0087s for 8192 events => throughput is 9.44E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1788s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1708s - [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.02E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1782s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1700s + [COUNTERS] Fortran MEs ( 1 ) : 0.0082s for 8192 events => throughput is 1.00E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,8 +108,8 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3707s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2832s + [COUNTERS] PROGRAM TOTAL : 0.3723s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2847s [COUNTERS] Fortran MEs ( 1 ) : 0.0875s for 90112 events => throughput is 1.03E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** @@ -133,9 +133,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715420701395E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1866s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1792s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0073s for 8192 events => throughput is 1.12E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1817s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1748s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0070s for 8192 events => throughput is 1.18E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602033080859E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3744s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2956s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0787s for 90112 events => throughput is 1.14E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3707s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2924s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0783s for 90112 events => throughput is 1.15E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.113565e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.144347e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.142626e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.167529e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715420701354E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1769s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1728s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0040s for 8192 events => throughput is 2.04E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1783s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1742s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0041s for 8192 events => throughput is 1.99E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602033080859E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3372s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2926s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0446s for 90112 events => throughput is 2.02E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3361s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2920s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0441s for 90112 events => throughput is 2.04E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.964919e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.980256e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.051752e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.115480e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,8 +285,8 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1805s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1771s + [COUNTERS] PROGRAM TOTAL : 0.1777s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1744s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.46E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3444s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3072s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0371s for 90112 events => throughput is 2.43E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3194s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2854s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0340s for 90112 events => throughput is 2.65E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.534941e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.572398e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.720296e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.680963e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1857s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1827s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.69E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1782s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1750s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.57E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3391s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3032s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0359s for 90112 events => throughput is 2.51E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3190s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2856s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0333s for 90112 events => throughput is 2.70E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.692433e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.614195e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.750694e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.771113e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1831s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1796s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.31E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1794s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1760s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.35E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3483s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3066s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0416s for 90112 events => throughput is 2.16E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3318s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2919s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0399s for 90112 events => throughput is 2.26E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.156388e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.177381e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.257287e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.322707e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715392009194E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.5936s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5931s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.57E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5966s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5961s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.65E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602021089631E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.7192s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7142s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0050s for 90112 events => throughput is 1.81E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7127s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7080s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0048s for 90112 events => throughput is 1.89E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.699826e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.321548e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.973881e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.935499e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.642059e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.284410e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.500425e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.498784e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.034916e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.255199e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.050339e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.059621e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.807602e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.256949e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.156854e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.180804e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index ccecc02825..69bbbacd47 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -2,17 +2,17 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda - -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 + +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - -make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:16:14 +DATE: 2024-07-29_22:04:26 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.8426s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7989s - [COUNTERS] Fortran MEs ( 1 ) : 0.0437s for 8192 events => throughput is 1.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7869s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7457s + [COUNTERS] Fortran MEs ( 1 ) : 0.0412s for 8192 events => throughput is 1.99E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4393s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3952s - [COUNTERS] Fortran MEs ( 1 ) : 0.0441s for 8192 events => throughput is 1.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4056s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3651s + [COUNTERS] Fortran MEs ( 1 ) : 0.0405s for 8192 events => throughput is 2.02E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.8386s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3588s - [COUNTERS] Fortran MEs ( 1 ) : 0.4798s for 90112 events => throughput is 1.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7300s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2762s + [COUNTERS] Fortran MEs ( 1 ) : 0.4538s for 90112 events => throughput is 1.99E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.5057s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4597s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0460s for 8192 events => throughput is 1.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7529s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7076s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0452s for 8192 events => throughput is 1.81E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989099] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.9929s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4771s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5158s for 90112 events => throughput is 1.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7878s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3128s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4750s for 90112 events => throughput is 1.90E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.819871e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.891474e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.838165e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.883124e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4393s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4133s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0260s for 8192 events => throughput is 3.15E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4084s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3834s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0250s for 8192 events => throughput is 3.27E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989106] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.6401s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3507s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2894s for 90112 events => throughput is 3.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5600s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2910s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2690s for 90112 events => throughput is 3.35E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.138027e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.264695e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.282175e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.431241e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4390s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4228s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0162s for 8192 events => throughput is 5.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3882s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3732s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0150s for 8192 events => throughput is 5.46E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989135] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5071s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3263s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1808s for 90112 events => throughput is 4.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4483s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2836s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1648s for 90112 events => throughput is 5.47E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.258519e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.290568e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.342811e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.310768e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4034s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3888s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0146s for 8192 events => throughput is 5.60E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3862s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3724s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0138s for 8192 events => throughput is 5.92E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989135] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4583s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2960s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1623s for 90112 events => throughput is 5.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5127s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3506s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1621s for 90112 events => throughput is 5.56E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.840330e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.330624e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.907730e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.474213e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3956s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3739s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0217s for 8192 events => throughput is 3.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4063s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3853s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0210s for 8192 events => throughput is 3.90E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989135] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4701s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2380s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2321s for 90112 events => throughput is 3.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5248s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2920s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2328s for 90112 events => throughput is 3.87E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.585624e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.586309e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.722456e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.663130e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,8 +513,8 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.8009s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8003s + [COUNTERS] PROGRAM TOTAL : 0.7886s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7880s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.42E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7129s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7060s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0069s for 90112 events => throughput is 1.31E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6977s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6913s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 90112 events => throughput is 1.40E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.914318e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.179164e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.613535e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.666655e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.870641e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.348517e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.085338e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.080520e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.804787e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.318776e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.159663e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.164696e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.702330e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.329059e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.067580e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.021399e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index f46b75eef7..684354b55b 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -4,10 +4,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone - make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 + +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' @@ -20,9 +20,9 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:16:41 +DATE: 2024-07-29_22:04:52 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.7783s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7371s - [COUNTERS] Fortran MEs ( 1 ) : 0.0411s for 8192 events => throughput is 1.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7872s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7457s + [COUNTERS] Fortran MEs ( 1 ) : 0.0415s for 8192 events => throughput is 1.97E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,8 +83,8 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3978s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3570s + [COUNTERS] PROGRAM TOTAL : 0.4012s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3605s [COUNTERS] Fortran MEs ( 1 ) : 0.0407s for 8192 events => throughput is 2.01E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.6995s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2512s - [COUNTERS] Fortran MEs ( 1 ) : 0.4483s for 90112 events => throughput is 2.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7304s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2745s + [COUNTERS] Fortran MEs ( 1 ) : 0.4559s for 90112 events => throughput is 1.98E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094179692708323] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4346s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3943s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0403s for 8192 events => throughput is 2.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4396s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3988s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0409s for 8192 events => throughput is 2.01E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105688388783328] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.6901s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2471s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4430s for 90112 events => throughput is 2.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7597s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3118s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4478s for 90112 events => throughput is 2.01E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.030986e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.982303e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.985412e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.017583e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094175707109216] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3873s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3704s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0168s for 8192 events => throughput is 4.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3910s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3747s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0164s for 8192 events => throughput is 5.01E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105684583433771] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4062s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2257s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1804s for 90112 events => throughput is 4.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5006s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3148s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1858s for 90112 events => throughput is 4.85E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.724911e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.710209e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.762208e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.864172e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094173726920275] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3731s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3646s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 8192 events => throughput is 9.69E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3845s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3760s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 8192 events => throughput is 9.61E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105684037363524] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3134s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2174s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0960s for 90112 events => throughput is 9.38E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3721s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2760s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0961s for 90112 events => throughput is 9.38E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.106025e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.360726e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.235160e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.250583e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094173726920275] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3725s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3646s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0079s for 8192 events => throughput is 1.04E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3777s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3697s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0080s for 8192 events => throughput is 1.02E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105684037363524] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3066s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2161s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0906s for 90112 events => throughput is 9.95E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3802s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2885s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0917s for 90112 events => throughput is 9.83E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.917882e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.745345e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.709952e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.004745e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094178448427996] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3747s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3637s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0110s for 8192 events => throughput is 7.46E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3966s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3841s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0125s for 8192 events => throughput is 6.54E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105688391432061] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3429s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2200s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1229s for 90112 events => throughput is 7.33E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4045s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2812s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1234s for 90112 events => throughput is 7.30E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.804453e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.765853e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.929498e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.085857e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184162782994] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.7869s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7863s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.57E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7920s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7915s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.71E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105694501043516] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.6604s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6548s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0056s for 90112 events => throughput is 1.61E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7068s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7013s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 90112 events => throughput is 1.63E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.908595e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.230004e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.266019e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.279837e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.049207e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.084229e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.416476e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.403533e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.062460e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.055257e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.551978e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.515583e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.413712e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.601159e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.519394e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.439244e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index dc1bcf4827..7b9c054b27 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -3,20 +3,20 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda + make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 + +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - -make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:17:06 +DATE: 2024-07-29_22:05:18 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.7779s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7372s - [COUNTERS] Fortran MEs ( 1 ) : 0.0407s for 8192 events => throughput is 2.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7808s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7402s + [COUNTERS] Fortran MEs ( 1 ) : 0.0406s for 8192 events => throughput is 2.02E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3947s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3534s - [COUNTERS] Fortran MEs ( 1 ) : 0.0413s for 8192 events => throughput is 1.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3999s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3582s + [COUNTERS] Fortran MEs ( 1 ) : 0.0417s for 8192 events => throughput is 1.97E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7153s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2615s - [COUNTERS] Fortran MEs ( 1 ) : 0.4538s for 90112 events => throughput is 1.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7182s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2684s + [COUNTERS] Fortran MEs ( 1 ) : 0.4498s for 90112 events => throughput is 2.00E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186141863901] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4374s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3938s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0436s for 8192 events => throughput is 1.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4464s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4028s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0435s for 8192 events => throughput is 1.88E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696630006634] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7264s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2487s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4777s for 90112 events => throughput is 1.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8075s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3185s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4890s for 90112 events => throughput is 1.84E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.883629e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.889376e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.895894e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.845573e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186141863901] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4011s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3774s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0237s for 8192 events => throughput is 3.45E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4071s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3826s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0245s for 8192 events => throughput is 3.34E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696630006626] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4880s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2278s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2602s for 90112 events => throughput is 3.46E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5623s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2930s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2693s for 90112 events => throughput is 3.35E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.317423e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.367227e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.373113e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.409905e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3877s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3732s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0145s for 8192 events => throughput is 5.66E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3925s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3767s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0158s for 8192 events => throughput is 5.18E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3817s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2190s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1627s for 90112 events => throughput is 5.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4540s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2884s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1656s for 90112 events => throughput is 5.44E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.391740e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.334360e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.381923e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.364322e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3811s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3680s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0131s for 8192 events => throughput is 6.26E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3899s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3756s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0143s for 8192 events => throughput is 5.73E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3634s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2153s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1482s for 90112 events => throughput is 6.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4298s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2806s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1492s for 90112 events => throughput is 6.04E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.907882e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.969072e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.901678e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.988815e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3949s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3745s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0204s for 8192 events => throughput is 4.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4061s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3839s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0222s for 8192 events => throughput is 3.69E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4596s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2305s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2291s for 90112 events => throughput is 3.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5202s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2910s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2292s for 90112 events => throughput is 3.93E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.830144e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.579230e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.799392e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.853829e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184798437830] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.7822s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7816s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.47E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7906s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7900s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.44E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279068492] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.6477s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6414s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 90112 events => throughput is 1.42E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7146s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7082s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 90112 events => throughput is 1.41E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.955290e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.169026e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.610728e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.568130e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.721513e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.343207e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.057949e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.063650e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.718962e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.308294e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.127927e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.140371e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.720504e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.340894e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.981768e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.017924e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index daad34ef63..aa5dfdb760 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,13 +1,13 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:17:32 +DATE: 2024-07-29_22:05:45 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.6619s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3441s - [COUNTERS] Fortran MEs ( 1 ) : 0.3178s for 8192 events => throughput is 2.58E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6835s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3530s + [COUNTERS] Fortran MEs ( 1 ) : 0.3304s for 8192 events => throughput is 2.48E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6512s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3209s - [COUNTERS] Fortran MEs ( 1 ) : 0.3303s for 8192 events => throughput is 2.48E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6450s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3226s + [COUNTERS] Fortran MEs ( 1 ) : 0.3224s for 8192 events => throughput is 2.54E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.0566s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4770s - [COUNTERS] Fortran MEs ( 1 ) : 3.5796s for 90112 events => throughput is 2.52E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.0354s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5076s + [COUNTERS] Fortran MEs ( 1 ) : 3.5278s for 90112 events => throughput is 2.55E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.9732s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6412s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3320s for 8192 events => throughput is 2.47E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9736s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6407s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3329s for 8192 events => throughput is 2.46E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717666E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.4802s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8183s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.6619s for 90112 events => throughput is 2.46E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.4744s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8120s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.6624s for 90112 events => throughput is 2.46E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.542070e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.422032e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.548855e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.526747e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607748863] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6596s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4845s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1750s for 8192 events => throughput is 4.68E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6652s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4904s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1748s for 8192 events => throughput is 4.69E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717666E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 3.5748s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6476s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.9272s for 90112 events => throughput is 4.68E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.6172s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6820s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.9352s for 90112 events => throughput is 4.66E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.742087e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.762899e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.785084e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.850747e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4868s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4003s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0864s for 8192 events => throughput is 9.48E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4906s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4040s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0866s for 8192 events => throughput is 9.46E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.5034s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5585s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9448s for 90112 events => throughput is 9.54E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.5661s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5973s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9688s for 90112 events => throughput is 9.30E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.300762e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.516172e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.558512e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.613300e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4736s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3967s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0769s for 8192 events => throughput is 1.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4722s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3935s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0787s for 8192 events => throughput is 1.04E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.4089s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5590s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8498s for 90112 events => throughput is 1.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4195s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5651s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8544s for 90112 events => throughput is 1.05E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.073418e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.069321e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.076553e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.082990e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5274s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4198s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1076s for 8192 events => throughput is 7.62E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5359s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4264s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1095s for 8192 events => throughput is 7.48E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.8167s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6114s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2053s for 90112 events => throughput is 7.48E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.7884s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6000s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1884s for 90112 events => throughput is 7.58E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.600195e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.450112e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.737653e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.544666e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.7530s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7476s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.52E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7609s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7556s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.53E+06 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717736E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 1.9540s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9311s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0229s for 90112 events => throughput is 3.93E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.9456s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9229s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 90112 events => throughput is 3.96E+06 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.632787e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.656639e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.212630e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.071461e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.415553e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.002534e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.240897e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.240708e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.415697e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.005309e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.251024e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.250788e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.408414e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.002363e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.766536e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.745901e+06 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 51c84bcce7..171495282f 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -2,21 +2,21 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda - make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 + make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:18:15 +DATE: 2024-07-29_22:06:27 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.6622s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3419s - [COUNTERS] Fortran MEs ( 1 ) : 0.3203s for 8192 events => throughput is 2.56E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6716s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3519s + [COUNTERS] Fortran MEs ( 1 ) : 0.3196s for 8192 events => throughput is 2.56E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6540s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3233s - [COUNTERS] Fortran MEs ( 1 ) : 0.3307s for 8192 events => throughput is 2.48E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6505s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3229s + [COUNTERS] Fortran MEs ( 1 ) : 0.3276s for 8192 events => throughput is 2.50E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.0535s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4792s - [COUNTERS] Fortran MEs ( 1 ) : 3.5744s for 90112 events => throughput is 2.52E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.0666s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5224s + [COUNTERS] Fortran MEs ( 1 ) : 3.5442s for 90112 events => throughput is 2.54E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112722616246457] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.9529s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6280s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3249s for 8192 events => throughput is 2.52E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9530s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6334s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3196s for 8192 events => throughput is 2.56E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238468293717765E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.7771s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9289s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.8482s for 90112 events => throughput is 2.34E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.4107s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8319s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.5788s for 90112 events => throughput is 2.52E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.295265e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.511357e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.274937e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.604573e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112720694019242] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5768s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4654s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1114s for 8192 events => throughput is 7.35E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5149s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4177s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0973s for 8192 events => throughput is 8.42E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238454783817719E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.9013s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7217s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1796s for 90112 events => throughput is 7.64E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.6632s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5798s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0834s for 90112 events => throughput is 8.32E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.824875e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.461718e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.970843e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.435078e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112721757974454] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4346s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3870s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0476s for 8192 events => throughput is 1.72E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4132s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3695s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0437s for 8192 events => throughput is 1.87E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238453732924513E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.2132s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6840s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5293s for 90112 events => throughput is 1.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0142s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5207s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4935s for 90112 events => throughput is 1.83E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.559003e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.845070e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.455805e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.845365e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112721757974454] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4492s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4015s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0477s for 8192 events => throughput is 1.72E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4005s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3592s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0413s for 8192 events => throughput is 1.98E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238453732924513E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.1232s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6431s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4802s for 90112 events => throughput is 1.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9727s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5263s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4464s for 90112 events => throughput is 2.02E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.558817e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.988979e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.780926e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.019310e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112723389095883] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4670s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4037s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0633s for 8192 events => throughput is 1.29E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4214s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3693s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0521s for 8192 events => throughput is 1.57E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238464413054557E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.3110s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6631s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6480s for 90112 events => throughput is 1.39E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1269s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5450s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5820s for 90112 events => throughput is 1.55E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.404960e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.528950e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.473914e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.555841e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112725654777677] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.7573s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7564s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0009s for 8192 events => throughput is 8.87E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7554s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7545s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0010s for 8192 events => throughput is 8.62E+06 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238470908598507E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 1.9333s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9227s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0106s for 90112 events => throughput is 8.47E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.9311s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9208s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0103s for 90112 events => throughput is 8.76E+06 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.129185e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.194333e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.549775e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.548073e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.539449e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.566751e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.726001e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.688797e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.546076e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.578996e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.761871e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.806743e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.365545e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.448586e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.281294e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.290804e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index b3a8db7192..b8f1a78b55 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -2,21 +2,21 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda + make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' - - - make USEBUILDDIR=1 BACKEND=cppavx2 + make USEBUILDDIR=1 BACKEND=cpp512y -make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' + +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:18:54 +DATE: 2024-07-29_22:07:05 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.6896s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3548s - [COUNTERS] Fortran MEs ( 1 ) : 0.3348s for 8192 events => throughput is 2.45E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6758s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3538s + [COUNTERS] Fortran MEs ( 1 ) : 0.3220s for 8192 events => throughput is 2.54E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6450s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3225s - [COUNTERS] Fortran MEs ( 1 ) : 0.3225s for 8192 events => throughput is 2.54E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6458s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3230s + [COUNTERS] Fortran MEs ( 1 ) : 0.3229s for 8192 events => throughput is 2.54E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.1061s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5000s - [COUNTERS] Fortran MEs ( 1 ) : 3.6061s for 90112 events => throughput is 2.50E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.0651s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5166s + [COUNTERS] Fortran MEs ( 1 ) : 3.5484s for 90112 events => throughput is 2.54E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748700702684] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.9893s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6448s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3444s for 8192 events => throughput is 2.38E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9945s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6537s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3409s for 8192 events => throughput is 2.40E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482679400354E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.6402s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8430s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.7972s for 90112 events => throughput is 2.37E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.5836s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8258s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.7578s for 90112 events => throughput is 2.40E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.504502e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.486744e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.493736e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.470946e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748702805033] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6596s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4865s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1730s for 8192 events => throughput is 4.73E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6636s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4897s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1740s for 8192 events => throughput is 4.71E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482683055667E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 3.5611s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6531s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.9081s for 90112 events => throughput is 4.72E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.5829s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6697s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.9132s for 90112 events => throughput is 4.71E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.885906e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.826413e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.454856e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.810991e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748681415580] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5422s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4457s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0965s for 8192 events => throughput is 8.49E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4893s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4020s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0873s for 8192 events => throughput is 9.38E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482534347232E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.8283s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7674s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0609s for 90112 events => throughput is 8.49E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.5450s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5843s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9607s for 90112 events => throughput is 9.38E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.150394e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.676024e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.706600e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.541755e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748681415580] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5467s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4535s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0932s for 8192 events => throughput is 8.79E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4743s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3974s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0769s for 8192 events => throughput is 1.07E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482534347232E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.5615s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6701s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8914s for 90112 events => throughput is 1.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4347s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5775s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8572s for 90112 events => throughput is 1.05E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.036553e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.075357e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.038231e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.098297e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748700265108] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5849s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4623s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1226s for 8192 events => throughput is 6.68E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5383s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4268s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1115s for 8192 events => throughput is 7.35E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482666076374E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.8307s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6135s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2171s for 90112 events => throughput is 7.40E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.8221s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6050s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2172s for 90112 events => throughput is 7.40E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.036325e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.884568e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.017098e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.228306e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,8 +513,8 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748601943165] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.7581s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7527s + [COUNTERS] PROGRAM TOTAL : 0.7514s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7459s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.51E+06 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481937154381E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 1.9432s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9204s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 90112 events => throughput is 3.95E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.9342s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9117s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0225s for 90112 events => throughput is 4.00E+06 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.628045e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.618143e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.092999e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.561179e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.285885e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.983430e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.235467e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.235256e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.248772e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.973212e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.246161e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.245564e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.266042e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.000778e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.742147e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.721545e+06 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index a3214916d8..81f52746be 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -3,9 +3,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:19:38 +DATE: 2024-07-29_22:07:48 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0 [UNWEIGHT] Wrote 11 events (found 187 events) - [COUNTERS] PROGRAM TOTAL : 4.3806s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2416s - [COUNTERS] Fortran MEs ( 1 ) : 4.1390s for 8192 events => throughput is 1.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4228s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2481s + [COUNTERS] Fortran MEs ( 1 ) : 4.1747s for 8192 events => throughput is 1.96E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 4.4016s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2413s - [COUNTERS] Fortran MEs ( 1 ) : 4.1603s for 8192 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4370s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2464s + [COUNTERS] Fortran MEs ( 1 ) : 4.1906s for 8192 events => throughput is 1.95E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083266099815] fbridge_mode=0 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 47.5196s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7600s - [COUNTERS] Fortran MEs ( 1 ) : 45.7596s for 90112 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 48.0368s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8082s + [COUNTERS] Fortran MEs ( 1 ) : 46.2286s for 90112 events => throughput is 1.95E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222236] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 8.8038s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4979s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.3060s for 8192 events => throughput is 1.90E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.7367s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4293s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.3074s for 8192 events => throughput is 1.90E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083266099799] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 53.4475s - [COUNTERS] Fortran Overhead ( 0 ) : 5.9535s - [COUNTERS] CudaCpp MEs ( 2 ) : 47.4939s for 90112 events => throughput is 1.90E+03 events/s + [COUNTERS] PROGRAM TOTAL : 53.5931s + [COUNTERS] Fortran Overhead ( 0 ) : 5.9868s + [COUNTERS] CudaCpp MEs ( 2 ) : 47.6062s for 90112 events => throughput is 1.89E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.922216e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.954096e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.959395e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.953080e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222236] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 4.7615s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4839s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2777s for 8192 events => throughput is 3.60E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.8895s + [COUNTERS] Fortran Overhead ( 0 ) : 2.5222s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3673s for 8192 events => throughput is 3.46E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083266099785] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 30.6075s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0936s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.5139s for 90112 events => throughput is 3.40E+03 events/s + [COUNTERS] PROGRAM TOTAL : 29.4741s + [COUNTERS] Fortran Overhead ( 0 ) : 4.0413s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.4328s for 90112 events => throughput is 3.54E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.496612e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.666565e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.539346e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.638265e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222231] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 2.2183s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2245s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9937s for 8192 events => throughput is 8.24E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.2201s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2224s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9977s for 8192 events => throughput is 8.21E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083266099799] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 14.0676s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7785s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.2891s for 90112 events => throughput is 7.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 13.8857s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7517s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.1340s for 90112 events => throughput is 8.09E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.419384e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.061983e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.678823e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.242742e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222231] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 1.9878s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1062s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8816s for 8192 events => throughput is 9.29E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.0014s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1024s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8990s for 8192 events => throughput is 9.11E+03 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083266099799] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 12.3962s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6208s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.7754s for 90112 events => throughput is 9.22E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.4952s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6498s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.8455s for 90112 events => throughput is 9.15E+03 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.529916e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.575097e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.344439e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.555042e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222231] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 2.4488s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3506s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0982s for 8192 events => throughput is 7.46E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.4500s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3432s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1067s for 8192 events => throughput is 7.40E+03 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083266099799] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 15.0103s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8885s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.1218s for 90112 events => throughput is 7.43E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.1395s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8838s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.2557s for 90112 events => throughput is 7.35E+03 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.477284e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.462614e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.503375e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.433750e+03 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222225] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 0.7764s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7438s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0326s for 8192 events => throughput is 2.51E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7813s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7483s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0330s for 8192 events => throughput is 2.48E+05 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083266099782] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 2.6460s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2818s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3642s for 90112 events => throughput is 2.47E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.6264s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2624s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3640s for 90112 events => throughput is 2.48E+05 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.283334e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.276536e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.506901e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.518906e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.117711e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.123901e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.183664e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.183279e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.114395e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.133650e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.181079e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.165040e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.111113e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.132084e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.444874e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.449380e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 565fe287ce..816c8b8c0f 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -14,9 +14,9 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:23:54 +DATE: 2024-07-29_22:12:03 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0 [UNWEIGHT] Wrote 11 events (found 187 events) - [COUNTERS] PROGRAM TOTAL : 4.3980s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2420s - [COUNTERS] Fortran MEs ( 1 ) : 4.1560s for 8192 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4394s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2491s + [COUNTERS] Fortran MEs ( 1 ) : 4.1903s for 8192 events => throughput is 1.96E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 4.4132s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2372s - [COUNTERS] Fortran MEs ( 1 ) : 4.1760s for 8192 events => throughput is 1.96E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4402s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2452s + [COUNTERS] Fortran MEs ( 1 ) : 4.1950s for 8192 events => throughput is 1.95E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083266099815] fbridge_mode=0 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 47.3387s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7498s - [COUNTERS] Fortran MEs ( 1 ) : 45.5889s for 90112 events => throughput is 1.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 47.8807s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8029s + [COUNTERS] Fortran MEs ( 1 ) : 46.0778s for 90112 events => throughput is 1.96E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320716615478996] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 8.4803s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3064s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.1739s for 8192 events => throughput is 1.96E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.5471s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3488s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.1983s for 8192 events => throughput is 1.95E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558162567940870] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 52.0102s - [COUNTERS] Fortran Overhead ( 0 ) : 5.8677s - [COUNTERS] CudaCpp MEs ( 2 ) : 46.1425s for 90112 events => throughput is 1.95E+03 events/s + [COUNTERS] PROGRAM TOTAL : 52.4200s + [COUNTERS] Fortran Overhead ( 0 ) : 5.8805s + [COUNTERS] CudaCpp MEs ( 2 ) : 46.5396s for 90112 events => throughput is 1.94E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.015479e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.002799e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.013117e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.979262e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320708851010073] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 2.5680s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4066s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1614s for 8192 events => throughput is 7.05E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5242s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3688s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1554s for 8192 events => throughput is 7.09E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558157380141428] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 15.5194s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8862s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.6332s for 90112 events => throughput is 7.13E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.6398s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8978s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.7420s for 90112 events => throughput is 7.07E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.320236e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.295445e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.204448e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.284825e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320704806184321] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 1.2285s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7290s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4995s for 8192 events => throughput is 1.64E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.2459s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7421s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5037s for 8192 events => throughput is 1.63E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558158459897135] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 7.7643s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2713s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.4931s for 90112 events => throughput is 1.64E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.8765s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2685s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.6079s for 90112 events => throughput is 1.61E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.674256e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.651416e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.684230e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.666476e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320704806184321] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 1.1706s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6998s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4708s for 8192 events => throughput is 1.74E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.1279s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6822s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4457s for 8192 events => throughput is 1.84E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558158459897135] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 7.1094s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2054s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.9040s for 90112 events => throughput is 1.84E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.1659s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2068s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.9591s for 90112 events => throughput is 1.82E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.876397e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.801807e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.833562e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.756964e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320713685871445] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 1.4404s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8351s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6053s for 8192 events => throughput is 1.35E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.3597s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8096s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5501s for 8192 events => throughput is 1.49E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558162184774774] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 8.3122s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3041s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.0081s for 90112 events => throughput is 1.50E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.4664s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3279s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.1385s for 90112 events => throughput is 1.47E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.515129e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.482943e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.524953e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.523551e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,8 +513,8 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320719394836651] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 0.7481s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7253s + [COUNTERS] PROGRAM TOTAL : 0.7500s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7272s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 8192 events => throughput is 3.60E+05 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558167135091578] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 2.4757s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2273s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2484s for 90112 events => throughput is 3.63E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.5023s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2544s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2479s for 90112 events => throughput is 3.64E+05 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.375249e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.381473e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.741898e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.729153e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.119355e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.092252e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.307435e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.224049e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.151709e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.173049e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.303009e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.296643e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.049512e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.083017e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.396866e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.399519e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 93675b1fbf..9f3676b1f2 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,10 +1,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg - - make USEBUILDDIR=1 BACKEND=cuda + make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:27:14 +DATE: 2024-07-29_22:15:25 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0 [UNWEIGHT] Wrote 11 events (found 187 events) - [COUNTERS] PROGRAM TOTAL : 4.3736s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2409s - [COUNTERS] Fortran MEs ( 1 ) : 4.1327s for 8192 events => throughput is 1.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4327s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2517s + [COUNTERS] Fortran MEs ( 1 ) : 4.1809s for 8192 events => throughput is 1.96E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 4.3735s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2382s - [COUNTERS] Fortran MEs ( 1 ) : 4.1353s for 8192 events => throughput is 1.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4140s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2435s + [COUNTERS] Fortran MEs ( 1 ) : 4.1705s for 8192 events => throughput is 1.96E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083266099815] fbridge_mode=0 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 47.4052s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7410s - [COUNTERS] Fortran MEs ( 1 ) : 45.6641s for 90112 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 47.8004s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8059s + [COUNTERS] Fortran MEs ( 1 ) : 45.9945s for 90112 events => throughput is 1.96E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556893412546] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 8.8075s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4671s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.3405s for 8192 events => throughput is 1.89E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.9100s + [COUNTERS] Fortran Overhead ( 0 ) : 4.5128s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.3972s for 8192 events => throughput is 1.86E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083370546855] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 53.8850s - [COUNTERS] Fortran Overhead ( 0 ) : 5.9787s - [COUNTERS] CudaCpp MEs ( 2 ) : 47.9063s for 90112 events => throughput is 1.88E+03 events/s + [COUNTERS] PROGRAM TOTAL : 54.3693s + [COUNTERS] Fortran Overhead ( 0 ) : 6.0368s + [COUNTERS] CudaCpp MEs ( 2 ) : 48.3325s for 90112 events => throughput is 1.86E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.948359e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.919187e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.951349e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.932595e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556780656974] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 4.7620s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4646s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2974s for 8192 events => throughput is 3.57E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7966s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4921s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3045s for 8192 events => throughput is 3.55E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083390630859] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 29.1849s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0005s - [COUNTERS] CudaCpp MEs ( 2 ) : 25.1844s for 90112 events => throughput is 3.58E+03 events/s + [COUNTERS] PROGRAM TOTAL : 29.5742s + [COUNTERS] Fortran Overhead ( 0 ) : 4.0357s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.5384s for 90112 events => throughput is 3.53E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.700037e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.657977e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.594428e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.650164e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556770726795] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 2.2008s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2107s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9900s for 8192 events => throughput is 8.27E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.2215s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2206s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0009s for 8192 events => throughput is 8.18E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083379720220] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 13.6123s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7227s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.8896s for 90112 events => throughput is 8.28E+03 events/s + [COUNTERS] PROGRAM TOTAL : 13.7319s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7573s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.9746s for 90112 events => throughput is 8.21E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.547846e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.410907e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.522651e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.459754e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556770726795] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 1.9517s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0869s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8648s for 8192 events => throughput is 9.47E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.9675s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0941s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8733s for 8192 events => throughput is 9.38E+03 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083379720220] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 12.1428s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5992s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.5436s for 90112 events => throughput is 9.44E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.2725s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6290s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.6435s for 90112 events => throughput is 9.34E+03 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.818655e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.689603e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.752589e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.356798e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556770726795] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 2.4530s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3428s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1102s for 8192 events => throughput is 7.38E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.4604s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3492s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1112s for 8192 events => throughput is 7.37E+03 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083379720220] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 16.4311s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0180s - [COUNTERS] CudaCpp MEs ( 2 ) : 13.4132s for 90112 events => throughput is 6.72E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.4583s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9095s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.5488s for 90112 events => throughput is 7.18E+03 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.834157e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.390056e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.943776e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.445415e+03 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.4632 [0.46320556665261842] fbridge_mode=1 [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 0.7919s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7589s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0330s for 8192 events => throughput is 2.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7781s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7452s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0328s for 8192 events => throughput is 2.50E+05 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2256 [0.22558083224243403] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 2.7451s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3819s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3632s for 90112 events => throughput is 2.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.6291s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2676s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3616s for 90112 events => throughput is 2.49E+05 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.281497e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.284193e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.508846e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.515603e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.120326e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.120168e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.183615e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.174974e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.108754e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.127662e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.168874e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.168348e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.121179e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.125210e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.453826e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.452380e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index a3ac7b5c1f..fa641934b6 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -4,18 +4,18 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make USEBUILDDIR=1 BACKEND=cpp512y - -make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:32:46 +DATE: 2024-07-29_22:22:28 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 101.2642s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5189s - [COUNTERS] Fortran MEs ( 1 ) : 100.7453s for 8192 events => throughput is 8.13E+01 events/s + [COUNTERS] PROGRAM TOTAL : 99.5492s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4900s + [COUNTERS] Fortran MEs ( 1 ) : 99.0592s for 8192 events => throughput is 8.27E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 98.9549s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5002s - [COUNTERS] Fortran MEs ( 1 ) : 98.4548s for 8192 events => throughput is 8.32E+01 events/s + [COUNTERS] PROGRAM TOTAL : 99.4008s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4945s + [COUNTERS] Fortran MEs ( 1 ) : 98.9064s for 8192 events => throughput is 8.28E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1078.9193s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3729s - [COUNTERS] Fortran MEs ( 1 ) : 1074.5464s for 90112 events => throughput is 8.39E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1094.7388s + [COUNTERS] Fortran Overhead ( 0 ) : 4.2979s + [COUNTERS] Fortran MEs ( 1 ) : 1090.4409s for 90112 events => throughput is 8.26E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939193E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 226.9636s - [COUNTERS] Fortran Overhead ( 0 ) : 104.1252s - [COUNTERS] CudaCpp MEs ( 2 ) : 122.8384s for 8192 events => throughput is 6.67E+01 events/s + [COUNTERS] PROGRAM TOTAL : 227.2365s + [COUNTERS] Fortran Overhead ( 0 ) : 104.3500s + [COUNTERS] CudaCpp MEs ( 2 ) : 122.8865s for 8192 events => throughput is 6.67E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1453.2216s - [COUNTERS] Fortran Overhead ( 0 ) : 108.8256s - [COUNTERS] CudaCpp MEs ( 2 ) : 1344.3960s for 90112 events => throughput is 6.70E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1445.8173s + [COUNTERS] Fortran Overhead ( 0 ) : 106.5425s + [COUNTERS] CudaCpp MEs ( 2 ) : 1339.2748s for 90112 events => throughput is 6.73E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.957500e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.920069e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.966466e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.943436e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939197E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 110.0381s - [COUNTERS] Fortran Overhead ( 0 ) : 50.3711s - [COUNTERS] CudaCpp MEs ( 2 ) : 59.6670s for 8192 events => throughput is 1.37E+02 events/s + [COUNTERS] PROGRAM TOTAL : 111.6193s + [COUNTERS] Fortran Overhead ( 0 ) : 51.6728s + [COUNTERS] CudaCpp MEs ( 2 ) : 59.9465s for 8192 events => throughput is 1.37E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656017E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 713.5237s - [COUNTERS] Fortran Overhead ( 0 ) : 54.3392s - [COUNTERS] CudaCpp MEs ( 2 ) : 659.1845s for 90112 events => throughput is 1.37E+02 events/s + [COUNTERS] PROGRAM TOTAL : 725.2020s + [COUNTERS] Fortran Overhead ( 0 ) : 56.1630s + [COUNTERS] CudaCpp MEs ( 2 ) : 669.0390s for 90112 events => throughput is 1.35E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.640648e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.591679e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.642221e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.603504e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 51.6707s - [COUNTERS] Fortran Overhead ( 0 ) : 23.8142s - [COUNTERS] CudaCpp MEs ( 2 ) : 27.8565s for 8192 events => throughput is 2.94E+02 events/s + [COUNTERS] PROGRAM TOTAL : 51.5843s + [COUNTERS] Fortran Overhead ( 0 ) : 23.8720s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.7124s for 8192 events => throughput is 2.96E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 333.1028s - [COUNTERS] Fortran Overhead ( 0 ) : 27.4583s - [COUNTERS] CudaCpp MEs ( 2 ) : 305.6444s for 90112 events => throughput is 2.95E+02 events/s + [COUNTERS] PROGRAM TOTAL : 332.1264s + [COUNTERS] Fortran Overhead ( 0 ) : 27.7568s + [COUNTERS] CudaCpp MEs ( 2 ) : 304.3697s for 90112 events => throughput is 2.96E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.542437e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.542507e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.558618e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.514809e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 45.2154s - [COUNTERS] Fortran Overhead ( 0 ) : 20.7378s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.4776s for 8192 events => throughput is 3.35E+02 events/s + [COUNTERS] PROGRAM TOTAL : 46.4882s + [COUNTERS] Fortran Overhead ( 0 ) : 20.8457s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.6425s for 8192 events => throughput is 3.19E+02 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 293.9503s - [COUNTERS] Fortran Overhead ( 0 ) : 24.5760s - [COUNTERS] CudaCpp MEs ( 2 ) : 269.3742s for 90112 events => throughput is 3.35E+02 events/s + [COUNTERS] PROGRAM TOTAL : 309.9712s + [COUNTERS] Fortran Overhead ( 0 ) : 24.8419s + [COUNTERS] CudaCpp MEs ( 2 ) : 285.1293s for 90112 events => throughput is 3.16E+02 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.038194e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.033721e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.054728e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.057809e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 46.6506s - [COUNTERS] Fortran Overhead ( 0 ) : 22.6220s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.0285s for 8192 events => throughput is 3.41E+02 events/s + [COUNTERS] PROGRAM TOTAL : 47.4714s + [COUNTERS] Fortran Overhead ( 0 ) : 23.0716s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.3999s for 8192 events => throughput is 3.36E+02 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 291.3320s - [COUNTERS] Fortran Overhead ( 0 ) : 26.5806s - [COUNTERS] CudaCpp MEs ( 2 ) : 264.7513s for 90112 events => throughput is 3.40E+02 events/s + [COUNTERS] PROGRAM TOTAL : 294.0148s + [COUNTERS] Fortran Overhead ( 0 ) : 26.7464s + [COUNTERS] CudaCpp MEs ( 2 ) : 267.2684s for 90112 events => throughput is 3.37E+02 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.666698e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.684705e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.681846e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.658848e+02 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939195E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 4.2374s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1562s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0812s for 8192 events => throughput is 7.58E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.2550s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1693s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0857s for 8192 events => throughput is 7.55E+03 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656006E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 18.7608s - [COUNTERS] Fortran Overhead ( 0 ) : 6.8530s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.9078s for 90112 events => throughput is 7.57E+03 events/s + [COUNTERS] PROGRAM TOTAL : 18.9067s + [COUNTERS] Fortran Overhead ( 0 ) : 6.9948s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.9119s for 90112 events => throughput is 7.56E+03 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.502460e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.519149e+03 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.309158e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.350437e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.222869e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.259221e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.577320e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.568218e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.238055e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.241387e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.437122e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.459484e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.224755e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.239820e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.231636e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.235198e+03 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index a539e33f24..d8abfedcca 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,11 +1,11 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_23:01:56 +DATE: 2024-07-29_23:52:22 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 97.1861s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4946s - [COUNTERS] Fortran MEs ( 1 ) : 96.6914s for 8192 events => throughput is 8.47E+01 events/s + [COUNTERS] PROGRAM TOTAL : 99.6079s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4881s + [COUNTERS] Fortran MEs ( 1 ) : 99.1199s for 8192 events => throughput is 8.26E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 96.7329s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4973s - [COUNTERS] Fortran MEs ( 1 ) : 96.2355s for 8192 events => throughput is 8.51E+01 events/s + [COUNTERS] PROGRAM TOTAL : 99.5199s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4908s + [COUNTERS] Fortran MEs ( 1 ) : 99.0292s for 8192 events => throughput is 8.27E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1066.1378s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3607s - [COUNTERS] Fortran MEs ( 1 ) : 1061.7771s for 90112 events => throughput is 8.49E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1095.2545s + [COUNTERS] Fortran Overhead ( 0 ) : 4.2977s + [COUNTERS] Fortran MEs ( 1 ) : 1090.9568s for 90112 events => throughput is 8.26E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405719945779552E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 204.5576s - [COUNTERS] Fortran Overhead ( 0 ) : 94.7222s - [COUNTERS] CudaCpp MEs ( 2 ) : 109.8354s for 8192 events => throughput is 7.46E+01 events/s + [COUNTERS] PROGRAM TOTAL : 200.9109s + [COUNTERS] Fortran Overhead ( 0 ) : 92.8174s + [COUNTERS] CudaCpp MEs ( 2 ) : 108.0935s for 8192 events => throughput is 7.58E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -168,9 +168,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326290777570335E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1302.1844s - [COUNTERS] Fortran Overhead ( 0 ) : 96.3064s - [COUNTERS] CudaCpp MEs ( 2 ) : 1205.8781s for 90112 events => throughput is 7.47E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1289.3065s + [COUNTERS] Fortran Overhead ( 0 ) : 96.9871s + [COUNTERS] CudaCpp MEs ( 2 ) : 1192.3195s for 90112 events => throughput is 7.56E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -183,12 +183,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.892403e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.863211e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.896902e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.875036e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -212,9 +212,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405716994349971E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 50.4740s - [COUNTERS] Fortran Overhead ( 0 ) : 23.8797s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.5944s for 8192 events => throughput is 3.08E+02 events/s + [COUNTERS] PROGRAM TOTAL : 50.5689s + [COUNTERS] Fortran Overhead ( 0 ) : 23.8610s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.7079s for 8192 events => throughput is 3.07E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -246,9 +246,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326284885505778E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 322.1801s - [COUNTERS] Fortran Overhead ( 0 ) : 27.6823s - [COUNTERS] CudaCpp MEs ( 2 ) : 294.4978s for 90112 events => throughput is 3.06E+02 events/s + [COUNTERS] PROGRAM TOTAL : 324.8239s + [COUNTERS] Fortran Overhead ( 0 ) : 28.0664s + [COUNTERS] CudaCpp MEs ( 2 ) : 296.7575s for 90112 events => throughput is 3.04E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -261,12 +261,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.518666e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.490813e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.484203e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.508951e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -290,9 +290,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405716646933743E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 26.5623s - [COUNTERS] Fortran Overhead ( 0 ) : 12.2724s - [COUNTERS] CudaCpp MEs ( 2 ) : 14.2899s for 8192 events => throughput is 5.73E+02 events/s + [COUNTERS] PROGRAM TOTAL : 26.1432s + [COUNTERS] Fortran Overhead ( 0 ) : 12.1714s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.9718s for 8192 events => throughput is 5.86E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -324,9 +324,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326277033163402E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 174.5012s - [COUNTERS] Fortran Overhead ( 0 ) : 16.5646s - [COUNTERS] CudaCpp MEs ( 2 ) : 157.9366s for 90112 events => throughput is 5.71E+02 events/s + [COUNTERS] PROGRAM TOTAL : 170.4055s + [COUNTERS] Fortran Overhead ( 0 ) : 16.0172s + [COUNTERS] CudaCpp MEs ( 2 ) : 154.3883s for 90112 events => throughput is 5.84E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -339,12 +339,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.701727e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.128487e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.718879e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.020900e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -368,9 +368,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405716646933743E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 23.9391s - [COUNTERS] Fortran Overhead ( 0 ) : 11.1292s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.8099s for 8192 events => throughput is 6.40E+02 events/s + [COUNTERS] PROGRAM TOTAL : 22.9330s + [COUNTERS] Fortran Overhead ( 0 ) : 10.6520s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.2810s for 8192 events => throughput is 6.67E+02 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -402,9 +402,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326277033163402E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 155.8533s - [COUNTERS] Fortran Overhead ( 0 ) : 14.9625s - [COUNTERS] CudaCpp MEs ( 2 ) : 140.8908s for 90112 events => throughput is 6.40E+02 events/s + [COUNTERS] PROGRAM TOTAL : 148.2361s + [COUNTERS] Fortran Overhead ( 0 ) : 14.5301s + [COUNTERS] CudaCpp MEs ( 2 ) : 133.7059s for 90112 events => throughput is 6.74E+02 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -417,12 +417,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.716591e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.016436e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.738216e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.120246e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -446,9 +446,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405719257109645E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 25.2840s - [COUNTERS] Fortran Overhead ( 0 ) : 12.4664s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.8176s for 8192 events => throughput is 6.39E+02 events/s + [COUNTERS] PROGRAM TOTAL : 23.7347s + [COUNTERS] Fortran Overhead ( 0 ) : 11.7114s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.0233s for 8192 events => throughput is 6.81E+02 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -480,9 +480,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326283665697276E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 149.3053s - [COUNTERS] Fortran Overhead ( 0 ) : 16.1390s - [COUNTERS] CudaCpp MEs ( 2 ) : 133.1664s for 90112 events => throughput is 6.77E+02 events/s + [COUNTERS] PROGRAM TOTAL : 147.8089s + [COUNTERS] Fortran Overhead ( 0 ) : 15.4600s + [COUNTERS] CudaCpp MEs ( 2 ) : 132.3489s for 90112 events => throughput is 6.81E+02 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -495,12 +495,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.340832e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.347422e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.256949e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.299669e+02 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -523,9 +523,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.241e-06 [1.2405721007137020E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 2.5997s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0687s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5310s for 8192 events => throughput is 1.54E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.6202s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0975s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5226s for 8192 events => throughput is 1.57E+04 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -556,9 +556,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326295421688232E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 11.4660s - [COUNTERS] Fortran Overhead ( 0 ) : 5.7555s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.7106s for 90112 events => throughput is 1.58E+04 events/s + [COUNTERS] PROGRAM TOTAL : 11.7194s + [COUNTERS] Fortran Overhead ( 0 ) : 5.8635s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.8559s for 90112 events => throughput is 1.54E+04 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -571,42 +571,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.530700e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.521949e+04 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.545413e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.541623e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.163666e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.154317e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.148478e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.147541e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.113035e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.154346e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.205309e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.146796e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.129848e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.145319e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.016707e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.054414e+03 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 78332de82a..825cc315f2 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -2,12 +2,12 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' @@ -24,15 +24,15 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-06-29_00:09:11 +DATE: 2024-07-30_00:59:41 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 96.7022s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4938s - [COUNTERS] Fortran MEs ( 1 ) : 96.2084s for 8192 events => throughput is 8.51E+01 events/s + [COUNTERS] PROGRAM TOTAL : 99.6445s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4883s + [COUNTERS] Fortran MEs ( 1 ) : 99.1562s for 8192 events => throughput is 8.26E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 96.6669s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5001s - [COUNTERS] Fortran MEs ( 1 ) : 96.1668s for 8192 events => throughput is 8.52E+01 events/s + [COUNTERS] PROGRAM TOTAL : 100.0106s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4950s + [COUNTERS] Fortran MEs ( 1 ) : 99.5156s for 8192 events => throughput is 8.23E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1066.3936s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3326s - [COUNTERS] Fortran MEs ( 1 ) : 1062.0609s for 90112 events => throughput is 8.48E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1096.9067s + [COUNTERS] Fortran Overhead ( 0 ) : 4.2925s + [COUNTERS] Fortran MEs ( 1 ) : 1092.6143s for 90112 events => throughput is 8.25E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985299359844E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 227.5404s - [COUNTERS] Fortran Overhead ( 0 ) : 104.7327s - [COUNTERS] CudaCpp MEs ( 2 ) : 122.8076s for 8192 events => throughput is 6.67E+01 events/s + [COUNTERS] PROGRAM TOTAL : 224.1823s + [COUNTERS] Fortran Overhead ( 0 ) : 102.8988s + [COUNTERS] CudaCpp MEs ( 2 ) : 121.2836s for 8192 events => throughput is 6.75E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993212353001E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1460.6766s - [COUNTERS] Fortran Overhead ( 0 ) : 108.1514s - [COUNTERS] CudaCpp MEs ( 2 ) : 1352.5253s for 90112 events => throughput is 6.66E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1434.6211s + [COUNTERS] Fortran Overhead ( 0 ) : 108.8918s + [COUNTERS] CudaCpp MEs ( 2 ) : 1325.7292s for 90112 events => throughput is 6.80E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.953346e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.917281e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.887442e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.942829e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985295828471E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 113.0177s - [COUNTERS] Fortran Overhead ( 0 ) : 51.7429s - [COUNTERS] CudaCpp MEs ( 2 ) : 61.2748s for 8192 events => throughput is 1.34E+02 events/s + [COUNTERS] PROGRAM TOTAL : 112.9645s + [COUNTERS] Fortran Overhead ( 0 ) : 52.0561s + [COUNTERS] CudaCpp MEs ( 2 ) : 60.9084s for 8192 events => throughput is 1.34E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993222645653E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 722.6357s - [COUNTERS] Fortran Overhead ( 0 ) : 55.1190s - [COUNTERS] CudaCpp MEs ( 2 ) : 667.5167s for 90112 events => throughput is 1.35E+02 events/s + [COUNTERS] PROGRAM TOTAL : 721.0610s + [COUNTERS] Fortran Overhead ( 0 ) : 55.6215s + [COUNTERS] CudaCpp MEs ( 2 ) : 665.4395s for 90112 events => throughput is 1.35E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.603521e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.597061e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.594140e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.601504e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 49.4491s - [COUNTERS] Fortran Overhead ( 0 ) : 22.5625s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.8866s for 8192 events => throughput is 3.05E+02 events/s + [COUNTERS] PROGRAM TOTAL : 48.9771s + [COUNTERS] Fortran Overhead ( 0 ) : 22.6219s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.3552s for 8192 events => throughput is 3.11E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 323.2367s - [COUNTERS] Fortran Overhead ( 0 ) : 26.2106s - [COUNTERS] CudaCpp MEs ( 2 ) : 297.0260s for 90112 events => throughput is 3.03E+02 events/s + [COUNTERS] PROGRAM TOTAL : 318.1437s + [COUNTERS] Fortran Overhead ( 0 ) : 26.4966s + [COUNTERS] CudaCpp MEs ( 2 ) : 291.6471s for 90112 events => throughput is 3.09E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.746482e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.668785e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.754234e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.711895e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 43.5060s - [COUNTERS] Fortran Overhead ( 0 ) : 19.6610s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.8450s for 8192 events => throughput is 3.44E+02 events/s + [COUNTERS] PROGRAM TOTAL : 43.4638s + [COUNTERS] Fortran Overhead ( 0 ) : 19.8200s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.6438s for 8192 events => throughput is 3.46E+02 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 285.3812s - [COUNTERS] Fortran Overhead ( 0 ) : 23.3907s - [COUNTERS] CudaCpp MEs ( 2 ) : 261.9904s for 90112 events => throughput is 3.44E+02 events/s + [COUNTERS] PROGRAM TOTAL : 286.7614s + [COUNTERS] Fortran Overhead ( 0 ) : 23.7091s + [COUNTERS] CudaCpp MEs ( 2 ) : 263.0522s for 90112 events => throughput is 3.43E+02 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.296352e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.223302e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.310764e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.222178e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 45.6608s - [COUNTERS] Fortran Overhead ( 0 ) : 22.1706s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.4902s for 8192 events => throughput is 3.49E+02 events/s + [COUNTERS] PROGRAM TOTAL : 46.5166s + [COUNTERS] Fortran Overhead ( 0 ) : 22.6065s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.9101s for 8192 events => throughput is 3.43E+02 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 283.0596s - [COUNTERS] Fortran Overhead ( 0 ) : 25.7915s - [COUNTERS] CudaCpp MEs ( 2 ) : 257.2681s for 90112 events => throughput is 3.50E+02 events/s + [COUNTERS] PROGRAM TOTAL : 287.3514s + [COUNTERS] Fortran Overhead ( 0 ) : 26.3051s + [COUNTERS] CudaCpp MEs ( 2 ) : 261.0463s for 90112 events => throughput is 3.45E+02 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.790026e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.744938e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.795378e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.754247e+02 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985217419736E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 3.5887s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7249s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8638s for 8192 events => throughput is 9.48E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.5910s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7301s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8608s for 8192 events => throughput is 9.52E+03 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993078576733E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 15.9655s - [COUNTERS] Fortran Overhead ( 0 ) : 6.4579s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.5077s for 90112 events => throughput is 9.48E+03 events/s + [COUNTERS] PROGRAM TOTAL : 16.0413s + [COUNTERS] Fortran Overhead ( 0 ) : 6.5592s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.4820s for 90112 events => throughput is 9.50E+03 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.454304e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.445560e+03 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.092578e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.084168e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.107884e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.110947e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.159186e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.158375e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.108717e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.107283e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.111609e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.113934e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.112293e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.106945e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.645128e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.646374e+03 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 5750f0dd36..7002220724 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -4,19 +4,19 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512y - make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' + +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:31:29 +DATE: 2024-07-29_22:19:41 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.4791s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4063s - [COUNTERS] Fortran MEs ( 1 ) : 0.0728s for 8192 events => throughput is 1.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4568s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3865s + [COUNTERS] Fortran MEs ( 1 ) : 0.0704s for 8192 events => throughput is 1.16E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4114s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3366s - [COUNTERS] Fortran MEs ( 1 ) : 0.0748s for 8192 events => throughput is 1.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3954s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3248s + [COUNTERS] Fortran MEs ( 1 ) : 0.0706s for 8192 events => throughput is 1.16E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.3443s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5396s - [COUNTERS] Fortran MEs ( 1 ) : 0.8047s for 90112 events => throughput is 1.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2816s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5068s + [COUNTERS] Fortran MEs ( 1 ) : 0.7749s for 90112 events => throughput is 1.16E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263335] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4894s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4114s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0780s for 8192 events => throughput is 1.05E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4717s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3966s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0751s for 8192 events => throughput is 1.09E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.4884s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6200s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8684s for 90112 events => throughput is 1.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3882s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5677s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8205s for 90112 events => throughput is 1.10E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.076463e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.102092e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.077689e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.118458e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351262541] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4183s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3754s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0429s for 8192 events => throughput is 1.91E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4071s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3657s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0414s for 8192 events => throughput is 1.98E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561281] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0553s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5812s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4741s for 90112 events => throughput is 1.90E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0038s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5449s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4589s for 90112 events => throughput is 1.96E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.913652e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.954953e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.900215e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.982079e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3820s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3572s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0248s for 8192 events => throughput is 3.31E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3703s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3462s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0241s for 8192 events => throughput is 3.40E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8264s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5516s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2748s for 90112 events => throughput is 3.28E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7770s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5138s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2632s for 90112 events => throughput is 3.42E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.312529e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.315807e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.346229e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.429662e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3800s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3577s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0223s for 8192 events => throughput is 3.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3671s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3459s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0212s for 8192 events => throughput is 3.87E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.7983s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5529s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2454s for 90112 events => throughput is 3.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8106s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5685s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2421s for 90112 events => throughput is 3.72E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.396153e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.594900e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.717666e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.679693e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4112s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3763s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0348s for 8192 events => throughput is 2.35E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4088s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3746s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0342s for 8192 events => throughput is 2.40E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0061s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6154s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3906s for 90112 events => throughput is 2.31E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8860s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5338s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3522s for 90112 events => throughput is 2.56E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.408273e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.368610e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.448995e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.402581e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263363] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.7845s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7838s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.14E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7535s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7528s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.24E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561304] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.9715s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9630s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 90112 events => throughput is 1.06E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.9689s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9610s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0079s for 90112 events => throughput is 1.14E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.482506e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.648602e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.967685e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.004536e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.213762e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.641077e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.531197e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.527664e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.254372e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.616584e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.843988e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.826733e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.244977e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.630239e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.788434e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.791497e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 19656e6368..d23ffc55bb 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,19 +1,19 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu - - make USEBUILDDIR=1 BACKEND=cuda + make USEBUILDDIR=1 BACKEND=cppnone + + make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make USEBUILDDIR=1 BACKEND=cppavx2 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make USEBUILDDIR=1 BACKEND=cpp512y - make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:31:59 +DATE: 2024-07-29_22:20:10 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.4662s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3929s - [COUNTERS] Fortran MEs ( 1 ) : 0.0732s for 8192 events => throughput is 1.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4557s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3853s + [COUNTERS] Fortran MEs ( 1 ) : 0.0704s for 8192 events => throughput is 1.16E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4147s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3397s - [COUNTERS] Fortran MEs ( 1 ) : 0.0750s for 8192 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3955s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3250s + [COUNTERS] Fortran MEs ( 1 ) : 0.0705s for 8192 events => throughput is 1.16E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.3268s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5225s - [COUNTERS] Fortran MEs ( 1 ) : 0.8043s for 90112 events => throughput is 1.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2866s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5101s + [COUNTERS] Fortran MEs ( 1 ) : 0.7765s for 90112 events => throughput is 1.16E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110463158198617] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4778s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4049s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0729s for 8192 events => throughput is 1.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4667s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3964s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0704s for 8192 events => throughput is 1.16E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686347932190] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.4140s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6089s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8051s for 90112 events => throughput is 1.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3505s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5679s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7826s for 90112 events => throughput is 1.15E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.138677e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.148659e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.134442e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.154490e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110459183868807] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3860s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3594s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0266s for 8192 events => throughput is 3.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3728s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3467s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0261s for 8192 events => throughput is 3.13E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510683073685827] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8423s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5500s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2923s for 90112 events => throughput is 3.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8063s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5213s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2850s for 90112 events => throughput is 3.16E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.036160e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.020436e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.064103e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.974895e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110460727141733] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3719s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3585s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0133s for 8192 events => throughput is 6.14E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3540s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3413s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0127s for 8192 events => throughput is 6.47E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510682516942223] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.6766s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5315s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1451s for 90112 events => throughput is 6.21E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6447s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5048s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1398s for 90112 events => throughput is 6.44E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.245191e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.341578e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.259060e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.274078e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110460727141733] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4340s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4218s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0122s for 8192 events => throughput is 6.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3487s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3372s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0115s for 8192 events => throughput is 7.12E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510682516942223] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.6834s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5468s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1366s for 90112 events => throughput is 6.60E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6618s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5301s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1317s for 90112 events => throughput is 6.84E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.674563e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.545784e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.747510e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.811410e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -429,38 +429,175 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2711 [0.27110464220032526] fbridge_mode=1 + [UNWEIGHT] Wrote 404 events (found 1228 events) + [COUNTERS] PROGRAM TOTAL : 0.3578s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3416s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0161s for 8192 events => throughput is 5.07E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110464220032526) differ by less than 4E-4 (2.771292368253242e-06) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2151 [0.21510685471570221] fbridge_mode=1 + [UNWEIGHT] Wrote 1939 events (found 1944 events) + [COUNTERS] PROGRAM TOTAL : 1.6913s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5146s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1767s for 90112 events => throughput is 5.10E+05 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510685471570221) differ by less than 4E-4 (5.043963013928732e-08) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.881734e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.818725e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2711 [0.27110477321990667] fbridge_mode=1 + [UNWEIGHT] Wrote 404 events (found 1228 events) + [COUNTERS] PROGRAM TOTAL : 0.7536s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7531s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.53E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.27110539351263330) and cuda (0.27110477321990667) differ by less than 4E-4 (2.2880132283242816e-06) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2151 [0.21510689318513457] fbridge_mode=1 + [UNWEIGHT] Wrote 1939 events (found 1944 events) + [COUNTERS] PROGRAM TOTAL : 1.9309s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9248s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0061s for 90112 events => throughput is 1.48E+07 events/s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21510686556561295) and cuda (0.21510689318513457) differ by less than 4E-4 (1.2839907048700638e-07) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.724373e+07 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.489292e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.097238e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.448583e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.051723e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.494580e+08 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.568237e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.342643e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** -Program received signal SIGFPE: Floating-point exception - erroneous arithmetic operation. - -Backtrace for this error: -#0 0x7f2ce7e23860 in ??? -#1 0x7f2ce7e22a05 in ??? -#2 0x7f2ce7a54def in ??? -#3 0x7f2ce84b810a in ??? -#4 0x7f2ce80f2575 in ??? -#5 0x7f2ce84b4c89 in ??? -#6 0x7f2ce84bebfd in ??? -#7 0x7f2ce84c4491 in ??? -#8 0x4300eb in ??? -#9 0x431c70 in ??? -#10 0x432da7 in ??? -#11 0x433b7e in ??? -#12 0x44a9c1 in ??? -#13 0x42ebdf in ??? -#14 0x40371e in ??? -#15 0x7f2ce7a3feaf in ??? -#16 0x7f2ce7a3ff5f in ??? -#17 0x403844 in ??? -#18 0xffffffffffffffff in ??? -./madX.sh: line 389: 827445 Floating point exception(core dumped) $timecmd $cmd < ${tmpin} > ${tmp} -ERROR! ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' failed - PDF set = nn23lo1 - alpha_s(Mz)= 0.1300 running at 2 loops. - alpha_s(Mz)= 0.1300 running at 2 loops. - Renormalization scale set on event-by-event basis - Factorization scale set on event-by-event basis - - - getting user params -Enter number of events and max and min iterations: - Number of events and iterations 8192 1 1 +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index ce6f992dd2..978d6d0053 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -2,21 +2,21 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda - make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 + +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' - -make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-28_21:32:16 +DATE: 2024-07-29_22:20:38 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.4634s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3913s - [COUNTERS] Fortran MEs ( 1 ) : 0.0721s for 8192 events => throughput is 1.14E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4578s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3865s + [COUNTERS] Fortran MEs ( 1 ) : 0.0713s for 8192 events => throughput is 1.15E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4051s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3318s - [COUNTERS] Fortran MEs ( 1 ) : 0.0733s for 8192 events => throughput is 1.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3968s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3259s + [COUNTERS] Fortran MEs ( 1 ) : 0.0709s for 8192 events => throughput is 1.15E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.3247s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5253s - [COUNTERS] Fortran MEs ( 1 ) : 0.7994s for 90112 events => throughput is 1.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2867s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5103s + [COUNTERS] Fortran MEs ( 1 ) : 0.7764s for 90112 events => throughput is 1.16E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539350666329] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4866s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4090s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0776s for 8192 events => throughput is 1.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4733s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3994s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0739s for 8192 events => throughput is 1.11E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686560103207] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.4492s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6001s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8491s for 90112 events => throughput is 1.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4005s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5745s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8260s for 90112 events => throughput is 1.09E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.073902e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.083382e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.081313e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.113799e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539350666335] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4169s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3745s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0424s for 8192 events => throughput is 1.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4074s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3664s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0410s for 8192 events => throughput is 2.00E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686560103204] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0520s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5829s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4690s for 90112 events => throughput is 1.92E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9803s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5321s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4483s for 90112 events => throughput is 2.01E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.888912e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.975641e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.907750e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.956054e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539330887440] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3963s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3710s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0253s for 8192 events => throughput is 3.24E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3714s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3470s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0244s for 8192 events => throughput is 3.36E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686557693198] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8273s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5566s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2707s for 90112 events => throughput is 3.33E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7810s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5210s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2600s for 90112 events => throughput is 3.47E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.409995e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.414370e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.381232e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.398277e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539330887440] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3784s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3565s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0219s for 8192 events => throughput is 3.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3660s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3453s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0207s for 8192 events => throughput is 3.96E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686557693198] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.7902s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5474s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2428s for 90112 events => throughput is 3.71E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7426s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5128s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2297s for 90112 events => throughput is 3.92E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.780633e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.794101e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.863761e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.858299e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -429,7 +429,6 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 @@ -438,9 +437,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539330887440] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4046s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3701s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0345s for 8192 events => throughput is 2.38E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4007s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3683s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0325s for 8192 events => throughput is 2.52E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +470,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686557693198] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.9365s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5577s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3788s for 90112 events => throughput is 2.38E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8989s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5334s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3655s for 90112 events => throughput is 2.47E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.345816e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.311036e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.395812e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.439663e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +513,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539343558537] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.7702s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7695s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.17E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7678s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7672s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.22E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +546,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686553631395] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0084s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0002s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 90112 events => throughput is 1.10E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.9701s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9624s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0077s for 90112 events => throughput is 1.16E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.433145e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.629001e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.010261e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.074446e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.473761e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.622509e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.584931e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.545358e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.303866e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.635746e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.831028e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.803183e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.325113e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.620011e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.788376e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.784168e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index 46804abf09..e7dc87a9c7 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -2,8 +2,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/h make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-29_01:37:41 +DATE: 2024-07-30_02:28:20 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 3321 events (found 6423 events) - [COUNTERS] PROGRAM TOTAL : 0.8908s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8436s - [COUNTERS] Fortran MEs ( 1 ) : 0.0472s for 8192 events => throughput is 1.73E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8948s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8454s + [COUNTERS] Fortran MEs ( 1 ) : 0.0494s for 8192 events => throughput is 1.66E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4057s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3597s - [COUNTERS] Fortran MEs ( 1 ) : 0.0460s for 8192 events => throughput is 1.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4049s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3572s + [COUNTERS] Fortran MEs ( 1 ) : 0.0477s for 8192 events => throughput is 1.72E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.7090s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1999s - [COUNTERS] Fortran MEs ( 1 ) : 0.5092s for 90112 events => throughput is 1.77E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7552s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2388s + [COUNTERS] Fortran MEs ( 1 ) : 0.5164s for 90112 events => throughput is 1.75E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256148] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4491s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4002s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0489s for 8192 events => throughput is 1.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4731s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4240s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0492s for 8192 events => throughput is 1.67E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377564] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.8072s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2708s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5364s for 90112 events => throughput is 1.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8529s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3085s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5444s for 90112 events => throughput is 1.66E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.706986e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.687871e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.699867e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.680530e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256152] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4065s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3803s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0263s for 8192 events => throughput is 3.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4143s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3875s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0269s for 8192 events => throughput is 3.05E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377564] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.5505s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2580s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2925s for 90112 events => throughput is 3.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5774s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2836s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2938s for 90112 events => throughput is 3.07E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.034167e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.975387e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.993777e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.051865e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256232] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3843s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3683s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0160s for 8192 events => throughput is 5.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3929s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3768s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0161s for 8192 events => throughput is 5.10E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377489] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.4149s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2377s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1772s for 90112 events => throughput is 5.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4513s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2724s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1789s for 90112 events => throughput is 5.04E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.046299e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.909868e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.149674e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.944178e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256232] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3859s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3712s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0147s for 8192 events => throughput is 5.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3877s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3726s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0150s for 8192 events => throughput is 5.44E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377489] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.3953s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2347s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1605s for 90112 events => throughput is 5.61E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4366s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2757s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1608s for 90112 events => throughput is 5.60E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.314068e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.395063e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.459027e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.423190e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256152] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4002s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3784s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0218s for 8192 events => throughput is 3.76E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4048s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3827s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0221s for 8192 events => throughput is 3.71E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377560] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.5943s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3316s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2627s for 90112 events => throughput is 3.43E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5178s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2754s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2424s for 90112 events => throughput is 3.72E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.445922e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.637222e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.494379e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.694605e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256165] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.7849s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7843s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.40E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7889s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7883s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.37E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377573] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 2.2160s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2094s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 90112 events => throughput is 1.36E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6884s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6818s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 90112 events => throughput is 1.37E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.826997e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.997381e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.347084e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.374799e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.841221e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.260229e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.717455e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.725948e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.827200e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.253431e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.038168e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.047210e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.844247e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.249593e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.744588e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.739596e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index bbce3b7240..b3757068d3 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -1,7 +1,7 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-29_01:38:08 +DATE: 2024-07-30_02:28:46 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 3321 events (found 6423 events) - [COUNTERS] PROGRAM TOTAL : 0.9065s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8605s - [COUNTERS] Fortran MEs ( 1 ) : 0.0460s for 8192 events => throughput is 1.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8968s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8495s + [COUNTERS] Fortran MEs ( 1 ) : 0.0473s for 8192 events => throughput is 1.73E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3960s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3500s - [COUNTERS] Fortran MEs ( 1 ) : 0.0460s for 8192 events => throughput is 1.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4044s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3570s + [COUNTERS] Fortran MEs ( 1 ) : 0.0473s for 8192 events => throughput is 1.73E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.7057s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1971s - [COUNTERS] Fortran MEs ( 1 ) : 0.5086s for 90112 events => throughput is 1.77E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7557s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2411s + [COUNTERS] Fortran MEs ( 1 ) : 0.5146s for 90112 events => throughput is 1.75E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162897355760356] fbridge_mode=1 [UNWEIGHT] Wrote 1620 events (found 1625 events) - [COUNTERS] PROGRAM TOTAL : 0.4481s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4028s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0453s for 8192 events => throughput is 1.81E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4520s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4063s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0457s for 8192 events => throughput is 1.79E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index 2d49c9f52b..76c6ba1c06 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -1,8 +1,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx - make USEBUILDDIR=1 BACKEND=cuda + make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-29_01:38:14 +DATE: 2024-07-30_02:28:52 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 3321 events (found 6423 events) - [COUNTERS] PROGRAM TOTAL : 0.8895s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8431s - [COUNTERS] Fortran MEs ( 1 ) : 0.0464s for 8192 events => throughput is 1.77E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.0530s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0031s + [COUNTERS] Fortran MEs ( 1 ) : 0.0499s for 8192 events => throughput is 1.64E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4135s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3638s - [COUNTERS] Fortran MEs ( 1 ) : 0.0497s for 8192 events => throughput is 1.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4058s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3591s + [COUNTERS] Fortran MEs ( 1 ) : 0.0467s for 8192 events => throughput is 1.75E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.7047s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1950s - [COUNTERS] Fortran MEs ( 1 ) : 0.5097s for 90112 events => throughput is 1.77E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7705s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2526s + [COUNTERS] Fortran MEs ( 1 ) : 0.5179s for 90112 events => throughput is 1.74E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955975930954] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4487s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4005s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0482s for 8192 events => throughput is 1.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4726s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4226s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0500s for 8192 events => throughput is 1.64E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -168,9 +168,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895706383660] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.8098s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2747s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5351s for 90112 events => throughput is 1.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9239s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3602s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5636s for 90112 events => throughput is 1.60E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -184,13 +184,13 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.605995e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.594154e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.601314e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.592186e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955975930958] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4069s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3802s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0266s for 8192 events => throughput is 3.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4125s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3858s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0266s for 8192 events => throughput is 3.07E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -248,9 +248,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895706383669] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.5495s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2534s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2962s for 90112 events => throughput is 3.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6033s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3022s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3011s for 90112 events => throughput is 2.99E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -264,13 +264,13 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.929578e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.837110e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.898682e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.884378e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,9 +294,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955953696393] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3820s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3660s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0160s for 8192 events => throughput is 5.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3897s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3735s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0163s for 8192 events => throughput is 5.04E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -328,9 +328,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895701245432] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.4322s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2507s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1815s for 90112 events => throughput is 4.96E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4550s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2732s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1818s for 90112 events => throughput is 4.96E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -344,13 +344,13 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.830829e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.714732e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.788599e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.873382e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,9 +374,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955953696393] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3978s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3830s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0147s for 8192 events => throughput is 5.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3924s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3773s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0151s for 8192 events => throughput is 5.41E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -408,9 +408,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895701245432] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.4028s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2374s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1654s for 90112 events => throughput is 5.45E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4587s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2903s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1684s for 90112 events => throughput is 5.35E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -424,13 +424,13 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.250961e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.350300e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.207264e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.221808e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955953691082] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4003s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3781s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0222s for 8192 events => throughput is 3.69E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4023s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3795s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 8192 events => throughput is 3.59E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -488,9 +488,9 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895701243878] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.4957s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2503s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2454s for 90112 events => throughput is 3.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5330s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2824s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2507s for 90112 events => throughput is 3.59E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -504,13 +504,13 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.177035e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.318226e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.405731e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.261182e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -533,9 +533,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0162955503257827] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.7815s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7809s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.39E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7869s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7864s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.42E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -566,8 +566,8 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggb [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.043 [2.0434895242795732] fbridge_mode=1 [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.6630s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6565s + [COUNTERS] PROGRAM TOTAL : 1.6921s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6856s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0065s for 90112 events => throughput is 1.38E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -581,42 +581,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.830803e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.949525e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.306249e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.287242e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.830251e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.234637e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.737584e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.699485e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.843710e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.254917e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.038788e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.040778e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.827828e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.258122e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.738262e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.727500e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_pptt_mad/log_pptt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_pptt_mad/log_pptt_mad_d_inl0_hrd0.txt index ba2026b842..829955e173 100644 --- a/epochX/cudacpp/tmad/logs_pptt_mad/log_pptt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_pptt_mad/log_pptt_mad_d_inl0_hrd0.txt @@ -3,8 +3,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/p make USEBUILDDIR=1 BACKEND=cuda - make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-07-26_17:56:51 +DATE: 2024-07-29_22:21:08 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/pp_tt.mad/SubProcesses/P1_uux_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_pptt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 63.84 [63.839349162943535] fbridge_mode=0 [UNWEIGHT] Wrote 2424 events (found 5627 events) - [COUNTERS] PROGRAM TOTAL : 0.7702s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7611s - [COUNTERS] Fortran MEs ( 1 ) : 0.0091s for 8192 events => throughput is 8.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7672s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7579s + [COUNTERS] Fortran MEs ( 1 ) : 0.0093s for 8192 events => throughput is 8.77E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_pptt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 63.84 [63.839349162943535] fbridge_mode=0 [UNWEIGHT] Wrote 1620 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3893s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3796s - [COUNTERS] Fortran MEs ( 1 ) : 0.0098s for 8192 events => throughput is 8.39E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3796s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3700s + [COUNTERS] Fortran MEs ( 1 ) : 0.0096s for 8192 events => throughput is 8.52E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_pptt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 64.33 [64.332743819208815] fbridge_mode=0 [UNWEIGHT] Wrote 2093 events (found 2098 events) - [COUNTERS] PROGRAM TOTAL : 1.6652s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5449s - [COUNTERS] Fortran MEs ( 1 ) : 0.1203s for 106496 events => throughput is 8.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6647s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5445s + [COUNTERS] Fortran MEs ( 1 ) : 0.1202s for 106496 events => throughput is 8.86E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_pptt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 63.84 [63.839349162943549] fbridge_mode=1 [UNWEIGHT] Wrote 1620 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3927s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3829s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0098s for 8192 events => throughput is 8.37E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3868s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3769s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0099s for 8192 events => throughput is 8.24E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_pptt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 64.33 [64.332743819208815] fbridge_mode=1 [UNWEIGHT] Wrote 2093 events (found 2098 events) - [COUNTERS] PROGRAM TOTAL : 1.7108s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5798s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1310s for 106496 events => throughput is 8.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7164s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5839s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1325s for 106496 events => throughput is 8.04E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_UUX_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.913703e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.919358e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_UUX_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.148615e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.000580e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_pptt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 63.84 [63.839349162943542] fbridge_mode=1 [UNWEIGHT] Wrote 1620 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3836s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3767s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0069s for 8192 events => throughput is 1.19E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3815s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3747s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0068s for 8192 events => throughput is 1.20E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_pptt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 64.33 [64.332743819208815] fbridge_mode=1 [UNWEIGHT] Wrote 2093 events (found 2098 events) - [COUNTERS] PROGRAM TOTAL : 1.6858s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5936s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0922s for 106496 events => throughput is 1.16E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6728s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5821s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0908s for 106496 events => throughput is 1.17E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_UUX_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.079163e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.109952e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_UUX_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.127213e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.118575e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_pptt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 63.84 [63.839349162943535] fbridge_mode=1 [UNWEIGHT] Wrote 1620 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3838s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3783s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.51E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3827s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3771s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0056s for 8192 events => throughput is 1.48E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_pptt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 64.33 [64.332743819208801] fbridge_mode=1 [UNWEIGHT] Wrote 2093 events (found 2098 events) - [COUNTERS] PROGRAM TOTAL : 1.6842s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6113s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0729s for 106496 events => throughput is 1.46E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6393s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5696s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0697s for 106496 events => throughput is 1.53E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_UUX_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.469819e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.453506e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_UUX_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.495929e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.523595e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_pptt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 63.84 [63.839349162943535] fbridge_mode=1 [UNWEIGHT] Wrote 1620 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3785s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3735s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0050s for 8192 events => throughput is 1.65E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3787s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3742s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0045s for 8192 events => throughput is 1.80E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_pptt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 64.33 [64.332743819208801] fbridge_mode=1 [UNWEIGHT] Wrote 2093 events (found 2098 events) - [COUNTERS] PROGRAM TOTAL : 1.6316s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5691s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0625s for 106496 events => throughput is 1.70E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6419s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5799s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0620s for 106496 events => throughput is 1.72E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_UUX_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.649526e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.638429e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_UUX_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.736382e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.705187e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_pptt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 63.84 [63.839349162943535] fbridge_mode=1 [UNWEIGHT] Wrote 1620 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3964s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3877s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0086s for 8192 events => throughput is 9.49E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3862s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3772s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0090s for 8192 events => throughput is 9.15E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_pptt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 64.33 [64.332743819208815] fbridge_mode=1 [UNWEIGHT] Wrote 2093 events (found 2098 events) - [COUNTERS] PROGRAM TOTAL : 1.7372s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6200s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1172s for 106496 events => throughput is 9.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6927s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5812s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1115s for 106496 events => throughput is 9.55E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_UUX_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.910776e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.102092e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_UUX_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.040851e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.026670e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_pptt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 63.84 [63.839349162943542] fbridge_mode=1 [UNWEIGHT] Wrote 1620 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.8152s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8146s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.49E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8122s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8117s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.59E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_pptt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 64.33 [64.332743819208801] fbridge_mode=1 [UNWEIGHT] Wrote 2093 events (found 2098 events) - [COUNTERS] PROGRAM TOTAL : 2.0145s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0080s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 106496 events => throughput is 1.66E+07 events/s + [COUNTERS] PROGRAM TOTAL : 2.0108s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0047s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0061s for 106496 events => throughput is 1.75E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_UUX_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.520740e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.174456e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_UUX_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.337600e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.334811e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_UUX_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.446494e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.142591e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_UUX_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.348228e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.348253e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_UUX_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.461630e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.113593e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_UUX_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.426123e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.445115e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_UUX_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.454528e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.128031e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_UUX_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.343511e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.345028e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_pptt_mad/log_pptt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_pptt_mad/log_pptt_mad_f_inl0_hrd0.txt index 4a75e73993..e2771afd07 100644 --- a/epochX/cudacpp/tmad/logs_pptt_mad/log_pptt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_pptt_mad/log_pptt_mad_f_inl0_hrd0.txt @@ -4,19 +4,19 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/p make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/pp_tt.mad/SubProcesses/P1_uux_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/pp_tt.mad/SubProcesses/P1_uux_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/pp_tt.mad/SubProcesses/P1_uux_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/pp_tt.mad/SubProcesses/P1_uux_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/pp_tt.mad/SubProcesses/P1_uux_ttx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/pp_tt.mad/SubProcesses/P1_uux_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/pp_tt.mad/SubProcesses/P1_uux_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/pp_tt.mad/SubProcesses/P1_uux_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/pp_tt.mad/SubProcesses/P1_uux_ttx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-07-26_17:57:19 +DATE: 2024-07-29_22:21:35 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/pp_tt.mad/SubProcesses/P1_uux_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_pptt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 63.84 [63.839349162943535] fbridge_mode=0 [UNWEIGHT] Wrote 2424 events (found 5627 events) - [COUNTERS] PROGRAM TOTAL : 0.7734s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7641s - [COUNTERS] Fortran MEs ( 1 ) : 0.0093s for 8192 events => throughput is 8.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7723s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7623s + [COUNTERS] Fortran MEs ( 1 ) : 0.0101s for 8192 events => throughput is 8.14E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -85,7 +85,7 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_pptt_x1_fortran > /tmp/aval [UNWEIGHT] Wrote 1620 events (found 1622 events) [COUNTERS] PROGRAM TOTAL : 0.3789s [COUNTERS] Fortran Overhead ( 0 ) : 0.3694s - [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.71E+05 events/s + [COUNTERS] Fortran MEs ( 1 ) : 0.0095s for 8192 events => throughput is 8.60E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_pptt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 64.33 [64.332743819208815] fbridge_mode=0 [UNWEIGHT] Wrote 2093 events (found 2098 events) - [COUNTERS] PROGRAM TOTAL : 1.6584s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5375s - [COUNTERS] Fortran MEs ( 1 ) : 0.1209s for 106496 events => throughput is 8.81E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6577s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5367s + [COUNTERS] Fortran MEs ( 1 ) : 0.1210s for 106496 events => throughput is 8.80E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_pptt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 63.84 [63.839366313757246] fbridge_mode=1 [UNWEIGHT] Wrote 1620 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3879s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3786s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0093s for 8192 events => throughput is 8.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3906s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3815s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0091s for 8192 events => throughput is 8.98E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_pptt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 64.33 [64.332743157628329] fbridge_mode=1 [UNWEIGHT] Wrote 2093 events (found 2098 events) - [COUNTERS] PROGRAM TOTAL : 1.7230s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6027s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1203s for 106496 events => throughput is 8.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7009s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5819s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1190s for 106496 events => throughput is 8.95E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_UUX_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.562121e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.790726e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_UUX_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.728814e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.757476e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_pptt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 63.84 [63.839359302378490] fbridge_mode=1 [UNWEIGHT] Wrote 1620 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3885s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3826s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0059s for 8192 events => throughput is 1.39E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3814s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3757s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0057s for 8192 events => throughput is 1.43E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_pptt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 64.33 [64.332735570664781] fbridge_mode=1 [UNWEIGHT] Wrote 2093 events (found 2098 events) - [COUNTERS] PROGRAM TOTAL : 1.6626s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5872s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0753s for 106496 events => throughput is 1.41E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6488s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5744s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0744s for 106496 events => throughput is 1.43E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_UUX_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.327418e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.324937e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_UUX_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.321149e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.342224e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_pptt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 63.84 [63.839377375607924] fbridge_mode=1 [UNWEIGHT] Wrote 1620 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3732s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3704s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.86E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3769s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3739s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.78E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_pptt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 64.33 [64.332735678901514] fbridge_mode=1 [UNWEIGHT] Wrote 2093 events (found 2098 events) - [COUNTERS] PROGRAM TOTAL : 1.6265s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5884s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0381s for 106496 events => throughput is 2.80E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6101s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5727s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0374s for 106496 events => throughput is 2.85E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_UUX_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.822671e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.819636e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_UUX_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.931120e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.968926e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_pptt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 63.84 [63.839377375607924] fbridge_mode=1 [UNWEIGHT] Wrote 1620 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3724s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3699s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.20E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3748s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3723s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.24E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_pptt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 64.33 [64.332735678901514] fbridge_mode=1 [UNWEIGHT] Wrote 2093 events (found 2098 events) - [COUNTERS] PROGRAM TOTAL : 1.6300s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5943s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0357s for 106496 events => throughput is 2.98E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6105s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5754s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0351s for 106496 events => throughput is 3.04E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_UUX_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.081662e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.019263e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_UUX_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.158349e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.197173e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_pptt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 63.84 [63.839383232393040] fbridge_mode=1 [UNWEIGHT] Wrote 1620 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3839s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3801s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0038s for 8192 events => throughput is 2.14E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3780s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3738s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0042s for 8192 events => throughput is 1.93E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,8 +470,8 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_pptt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 64.33 [64.332741873476778] fbridge_mode=1 [UNWEIGHT] Wrote 2093 events (found 2098 events) - [COUNTERS] PROGRAM TOTAL : 1.6398s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5865s + [COUNTERS] PROGRAM TOTAL : 1.6267s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5736s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0532s for 106496 events => throughput is 2.00E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_UUX_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.934025e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.965688e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_UUX_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.039534e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.042783e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_pptt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 63.84 [63.839390592146394] fbridge_mode=1 [UNWEIGHT] Wrote 1620 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.8044s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8039s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.65E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7980s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7975s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.70E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_pptt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 64.33 [64.332748339525949] fbridge_mode=1 [UNWEIGHT] Wrote 2093 events (found 2098 events) - [COUNTERS] PROGRAM TOTAL : 2.0254s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0196s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0058s for 106496 events => throughput is 1.84E+07 events/s + [COUNTERS] PROGRAM TOTAL : 2.0087s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0031s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0056s for 106496 events => throughput is 1.91E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_UUX_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.803334e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.625165e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_UUX_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.426397e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.412392e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_UUX_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.380757e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.651332e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_UUX_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.514810e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.456610e+09 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_UUX_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.414745e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.813226e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_UUX_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.591621e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.593677e+09 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_UUX_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.016921e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.003051e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_UUX_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.214155e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.215001e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_pptt_mad/log_pptt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_pptt_mad/log_pptt_mad_m_inl0_hrd0.txt index a65f22b69b..048a838608 100644 --- a/epochX/cudacpp/tmad/logs_pptt_mad/log_pptt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_pptt_mad/log_pptt_mad_m_inl0_hrd0.txt @@ -1,10 +1,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/pp_tt.mad/SubProcesses/P1_uux_ttx +make USEBUILDDIR=1 BACKEND=cuda - -make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -14,8 +14,8 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/pp_tt.mad/SubProcesses/P1_uux_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/pp_tt.mad/SubProcesses/P1_uux_ttx' -make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/pp_tt.mad/SubProcesses/P1_uux_ttx' +make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/pp_tt.mad/SubProcesses/P1_uux_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/pp_tt.mad/SubProcesses/P1_uux_ttx' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-07-26_17:57:46 +DATE: 2024-07-29_22:22:01 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/pp_tt.mad/SubProcesses/P1_uux_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_pptt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 63.84 [63.839349162943535] fbridge_mode=0 [UNWEIGHT] Wrote 2424 events (found 5627 events) - [COUNTERS] PROGRAM TOTAL : 0.8198s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8095s - [COUNTERS] Fortran MEs ( 1 ) : 0.0102s for 8192 events => throughput is 8.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7666s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7573s + [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.74E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_pptt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 63.84 [63.839349162943535] fbridge_mode=0 [UNWEIGHT] Wrote 1620 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3790s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3691s - [COUNTERS] Fortran MEs ( 1 ) : 0.0099s for 8192 events => throughput is 8.26E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3755s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3660s + [COUNTERS] Fortran MEs ( 1 ) : 0.0095s for 8192 events => throughput is 8.60E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_pptt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 64.33 [64.332743819208815] fbridge_mode=0 [UNWEIGHT] Wrote 2093 events (found 2098 events) - [COUNTERS] PROGRAM TOTAL : 1.6681s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5480s - [COUNTERS] Fortran MEs ( 1 ) : 0.1201s for 106496 events => throughput is 8.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6663s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5440s + [COUNTERS] Fortran MEs ( 1 ) : 0.1223s for 106496 events => throughput is 8.71E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_pptt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 63.84 [63.839349263543347] fbridge_mode=1 [UNWEIGHT] Wrote 1620 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3886s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3786s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0100s for 8192 events => throughput is 8.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4009s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3901s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0107s for 8192 events => throughput is 7.63E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_pptt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 64.33 [64.332743819488869] fbridge_mode=1 [UNWEIGHT] Wrote 2093 events (found 2098 events) - [COUNTERS] PROGRAM TOTAL : 1.7158s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5831s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1327s for 106496 events => throughput is 8.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7171s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5844s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1327s for 106496 events => throughput is 8.02E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_UUX_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.767181e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.732280e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_UUX_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.904968e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.997657e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_pptt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 63.84 [63.839349263543347] fbridge_mode=1 [UNWEIGHT] Wrote 1620 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3819s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3752s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0067s for 8192 events => throughput is 1.22E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3836s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3770s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 8192 events => throughput is 1.25E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_pptt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 64.33 [64.332743819488869] fbridge_mode=1 [UNWEIGHT] Wrote 2093 events (found 2098 events) - [COUNTERS] PROGRAM TOTAL : 1.6739s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5873s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0866s for 106496 events => throughput is 1.23E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6811s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5944s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0867s for 106496 events => throughput is 1.23E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_UUX_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.116369e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.109773e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_UUX_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.137203e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.145557e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_pptt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 63.84 [63.839349204206499] fbridge_mode=1 [UNWEIGHT] Wrote 1620 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3823s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3771s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0052s for 8192 events => throughput is 1.58E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3789s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3737s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0053s for 8192 events => throughput is 1.56E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_pptt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 64.33 [64.332743819200957] fbridge_mode=1 [UNWEIGHT] Wrote 2093 events (found 2098 events) - [COUNTERS] PROGRAM TOTAL : 1.6518s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5826s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0691s for 106496 events => throughput is 1.54E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6585s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5889s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0695s for 106496 events => throughput is 1.53E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_UUX_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.486990e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.506404e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_UUX_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.531608e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.534327e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_pptt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 63.84 [63.839349204206499] fbridge_mode=1 [UNWEIGHT] Wrote 1620 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3775s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3730s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0045s for 8192 events => throughput is 1.84E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3770s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3725s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0044s for 8192 events => throughput is 1.84E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_pptt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 64.33 [64.332743819200957] fbridge_mode=1 [UNWEIGHT] Wrote 2093 events (found 2098 events) - [COUNTERS] PROGRAM TOTAL : 1.6596s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5987s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0609s for 106496 events => throughput is 1.75E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6337s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5728s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0608s for 106496 events => throughput is 1.75E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_UUX_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.682403e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.731971e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_UUX_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.779738e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.757008e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_pptt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 63.84 [63.839349204206499] fbridge_mode=1 [UNWEIGHT] Wrote 1620 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3913s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3830s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0083s for 8192 events => throughput is 9.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3857s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3775s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 8192 events => throughput is 1.00E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_pptt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 64.33 [64.332743819200957] fbridge_mode=1 [UNWEIGHT] Wrote 2093 events (found 2098 events) - [COUNTERS] PROGRAM TOTAL : 1.7234s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6093s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1141s for 106496 events => throughput is 9.33E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6927s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5799s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1128s for 106496 events => throughput is 9.44E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_UUX_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.976558e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.882527e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_UUX_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.062247e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.165670e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_pptt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 63.84 [63.839349179472457] fbridge_mode=1 [UNWEIGHT] Wrote 1620 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.8013s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8008s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.53E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8037s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8032s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.57E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_pptt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 64.33 [64.332743819326211] fbridge_mode=1 [UNWEIGHT] Wrote 2093 events (found 2098 events) - [COUNTERS] PROGRAM TOTAL : 2.0184s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0120s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 106496 events => throughput is 1.68E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.9973s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9914s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0059s for 106496 events => throughput is 1.80E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_UUX_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.538486e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.961399e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_UUX_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.317529e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.337055e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_UUX_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.439543e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.127651e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_UUX_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.338940e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.347067e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_UUX_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.423551e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.131794e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_UUX_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.421331e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.440642e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_UUX_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.431373e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.122736e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_UUX_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.341506e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.339016e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index cacd0f35d9..17dedf8f20 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -3,20 +3,20 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-29_01:40:12 +DATE: 2024-07-30_02:31:50 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 1041 events) - [COUNTERS] PROGRAM TOTAL : 2.6435s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3260s - [COUNTERS] Fortran MEs ( 1 ) : 2.3175s for 8192 events => throughput is 3.53E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5608s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3298s + [COUNTERS] Fortran MEs ( 1 ) : 2.2310s for 8192 events => throughput is 3.67E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.5471s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3268s - [COUNTERS] Fortran MEs ( 1 ) : 2.2203s for 8192 events => throughput is 3.69E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5945s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3317s + [COUNTERS] Fortran MEs ( 1 ) : 2.2628s for 8192 events => throughput is 3.62E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > / [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 26.2382s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7627s - [COUNTERS] Fortran MEs ( 1 ) : 24.4756s for 90112 events => throughput is 3.68E+03 events/s + [COUNTERS] PROGRAM TOTAL : 26.4874s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7889s + [COUNTERS] Fortran MEs ( 1 ) : 24.6985s for 90112 events => throughput is 3.65E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 5.0170s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6185s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.3985s for 8192 events => throughput is 3.42E+03 events/s + [COUNTERS] PROGRAM TOTAL : 5.0714s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6544s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.4170s for 8192 events => throughput is 3.39E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438187E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 30.4436s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0366s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.4070s for 90112 events => throughput is 3.41E+03 events/s + [COUNTERS] PROGRAM TOTAL : 30.5460s + [COUNTERS] Fortran Overhead ( 0 ) : 4.0827s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.4633s for 90112 events => throughput is 3.41E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.563022e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.515549e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.568130e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.489713e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084412E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.8190s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5560s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2630s for 8192 events => throughput is 6.49E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.8906s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6087s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2819s for 8192 events => throughput is 6.39E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 16.9508s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9389s - [COUNTERS] CudaCpp MEs ( 2 ) : 14.0119s for 90112 events => throughput is 6.43E+03 events/s + [COUNTERS] PROGRAM TOTAL : 16.9126s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9986s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.9141s for 90112 events => throughput is 6.48E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.403640e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.645839e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.805258e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.676484e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.4145s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8628s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5516s for 8192 events => throughput is 1.49E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.4419s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8799s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5620s for 8192 events => throughput is 1.46E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 8.4794s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2982s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.1812s for 90112 events => throughput is 1.46E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.5900s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3276s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.2624s for 90112 events => throughput is 1.44E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.512239e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.499219e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.514846e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.496015e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.3031s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8025s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5006s for 8192 events => throughput is 1.64E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.3112s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8140s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4972s for 8192 events => throughput is 1.65E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 7.6774s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2371s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.4403s for 90112 events => throughput is 1.66E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.7455s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2543s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.4912s for 90112 events => throughput is 1.64E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.729653e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.709963e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.736018e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.701070e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.5822s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9519s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6302s for 8192 events => throughput is 1.30E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.5942s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9589s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6352s for 8192 events => throughput is 1.29E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 9.3263s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3416s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.9846s for 90112 events => throughput is 1.29E+04 events/s + [COUNTERS] PROGRAM TOTAL : 9.5906s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4160s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.1746s for 90112 events => throughput is 1.26E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.291281e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.304269e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.310298e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.297462e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,8 +513,8 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftgg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8138s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7967s + [COUNTERS] PROGRAM TOTAL : 0.8248s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8077s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0171s for 8192 events => throughput is 4.79E+05 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftgg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 2.3691s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1805s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1886s for 90112 events => throughput is 4.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4256s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2371s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1885s for 90112 events => throughput is 4.78E+05 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.833128e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.853072e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.235176e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.211489e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.111711e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.198591e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.408923e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.410312e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.159800e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.153912e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.418265e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.411347e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.120081e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.159117e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.756468e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.761919e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index 92432a70ab..0a163b543f 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -3,20 +3,20 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-29_01:42:44 +DATE: 2024-07-30_02:34:23 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 1041 events) - [COUNTERS] PROGRAM TOTAL : 2.5425s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3274s - [COUNTERS] Fortran MEs ( 1 ) : 2.2151s for 8192 events => throughput is 3.70E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5764s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3377s + [COUNTERS] Fortran MEs ( 1 ) : 2.2387s for 8192 events => throughput is 3.66E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.5437s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3275s - [COUNTERS] Fortran MEs ( 1 ) : 2.2162s for 8192 events => throughput is 3.70E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5766s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3288s + [COUNTERS] Fortran MEs ( 1 ) : 2.2477s for 8192 events => throughput is 3.64E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > / [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 26.3201s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7716s - [COUNTERS] Fortran MEs ( 1 ) : 24.5485s for 90112 events => throughput is 3.67E+03 events/s + [COUNTERS] PROGRAM TOTAL : 26.4036s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7651s + [COUNTERS] Fortran MEs ( 1 ) : 24.6386s for 90112 events => throughput is 3.66E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896784952157763E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 4.9557s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5931s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.3626s for 8192 events => throughput is 3.47E+03 events/s + [COUNTERS] PROGRAM TOTAL : 5.0210s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6310s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3900s for 8192 events => throughput is 3.43E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668138450782073E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 29.8768s - [COUNTERS] Fortran Overhead ( 0 ) : 3.9900s - [COUNTERS] CudaCpp MEs ( 2 ) : 25.8868s for 90112 events => throughput is 3.48E+03 events/s + [COUNTERS] PROGRAM TOTAL : 30.3112s + [COUNTERS] Fortran Overhead ( 0 ) : 4.0777s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.2335s for 90112 events => throughput is 3.44E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.615815e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.580616e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.588760e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.574017e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896766542858863E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.7362s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0215s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7147s for 8192 events => throughput is 1.15E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.6361s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9753s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6609s for 8192 events => throughput is 1.24E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668121906848987E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 9.5915s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3718s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.2197s for 90112 events => throughput is 1.25E+04 events/s + [COUNTERS] PROGRAM TOTAL : 9.6579s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4193s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.2386s for 90112 events => throughput is 1.24E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.287471e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.274459e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.287472e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.271876e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896764408326359E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8858s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6041s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2817s for 8192 events => throughput is 2.91E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8901s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6081s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2820s for 8192 events => throughput is 2.91E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668124799901306E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 5.1146s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9843s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.1303s for 90112 events => throughput is 2.88E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.1549s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0339s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.1210s for 90112 events => throughput is 2.89E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.952012e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.977524e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.978134e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.857158e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896764408326359E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8292s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5726s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2566s for 8192 events => throughput is 3.19E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8360s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5817s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2543s for 8192 events => throughput is 3.22E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668124799901306E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 4.7825s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9553s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.8272s for 90112 events => throughput is 3.19E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.9100s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0297s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.8803s for 90112 events => throughput is 3.13E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.330203e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.288929e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.291014e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.243275e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896778056937195E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.9527s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6385s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3142s for 8192 events => throughput is 2.61E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9694s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6474s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3220s for 8192 events => throughput is 2.54E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668139178203571E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 5.5476s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0509s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.4967s for 90112 events => throughput is 2.58E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.7362s + [COUNTERS] Fortran Overhead ( 0 ) : 2.1232s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.6130s for 90112 events => throughput is 2.49E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.605632e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.555333e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.608873e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.571496e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftgg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896802503195373E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8048s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7896s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0152s for 8192 events => throughput is 5.39E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8103s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7950s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0152s for 8192 events => throughput is 5.37E+05 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftgg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668190930428073E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 2.3385s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1704s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1681s for 90112 events => throughput is 5.36E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3787s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2104s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1683s for 90112 events => throughput is 5.35E+05 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.899244e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.897116e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.168943e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.152418e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.330723e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.334624e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.347926e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.349713e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.329833e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.333574e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.345608e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.348370e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.313833e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.327904e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.679665e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.680664e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index 7abb61d6c6..3435ae7ece 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-29_01:44:48 +DATE: 2024-07-30_02:36:28 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 1041 events) - [COUNTERS] PROGRAM TOTAL : 2.5379s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3249s - [COUNTERS] Fortran MEs ( 1 ) : 2.2131s for 8192 events => throughput is 3.70E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5682s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3303s + [COUNTERS] Fortran MEs ( 1 ) : 2.2379s for 8192 events => throughput is 3.66E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.5505s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3304s - [COUNTERS] Fortran MEs ( 1 ) : 2.2201s for 8192 events => throughput is 3.69E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5662s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3320s + [COUNTERS] Fortran MEs ( 1 ) : 2.2343s for 8192 events => throughput is 3.67E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > / [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 26.2622s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7530s - [COUNTERS] Fortran MEs ( 1 ) : 24.5092s for 90112 events => throughput is 3.68E+03 events/s + [COUNTERS] PROGRAM TOTAL : 26.4109s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7683s + [COUNTERS] Fortran MEs ( 1 ) : 24.6426s for 90112 events => throughput is 3.66E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696375074447E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 5.0598s - [COUNTERS] Fortran Overhead ( 0 ) : 2.6439s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.4159s for 8192 events => throughput is 3.39E+03 events/s + [COUNTERS] PROGRAM TOTAL : 5.1239s + [COUNTERS] Fortran Overhead ( 0 ) : 2.6720s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.4519s for 8192 events => throughput is 3.34E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668081976882373E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 30.6006s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0492s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.5514s for 90112 events => throughput is 3.39E+03 events/s + [COUNTERS] PROGRAM TOTAL : 30.8330s + [COUNTERS] Fortran Overhead ( 0 ) : 4.1491s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.6838s for 90112 events => throughput is 3.38E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.474454e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.521111e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.439517e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.520835e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696285825688E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.7397s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5249s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2148s for 8192 events => throughput is 6.74E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7761s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5337s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2424s for 8192 events => throughput is 6.59E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668081890954375E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 16.3722s - [COUNTERS] Fortran Overhead ( 0 ) : 2.9270s - [COUNTERS] CudaCpp MEs ( 2 ) : 13.4452s for 90112 events => throughput is 6.70E+03 events/s + [COUNTERS] PROGRAM TOTAL : 16.6726s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9887s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.6839s for 90112 events => throughput is 6.59E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.040827e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.996506e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.981328e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.978045e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.4085s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8620s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5465s for 8192 events => throughput is 1.50E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.4406s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8736s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5670s for 8192 events => throughput is 1.44E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 8.3202s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2754s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.0449s for 90112 events => throughput is 1.49E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.5129s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3246s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.1883s for 90112 events => throughput is 1.46E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.536921e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.511393e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.529532e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.521376e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.2826s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7966s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4860s for 8192 events => throughput is 1.69E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.3110s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8200s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4910s for 8192 events => throughput is 1.67E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 7.5114s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1898s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.3216s for 90112 events => throughput is 1.69E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.6171s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2357s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.3814s for 90112 events => throughput is 1.67E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.741888e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.739231e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.760590e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.744026e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.5988s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9602s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6386s for 8192 events => throughput is 1.28E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.6199s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9737s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6462s for 8192 events => throughput is 1.27E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 9.4068s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3459s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.0609s for 90112 events => throughput is 1.28E+04 events/s + [COUNTERS] PROGRAM TOTAL : 9.5493s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4168s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.1324s for 90112 events => throughput is 1.26E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.302056e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.285933e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.302590e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.288859e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,8 +513,8 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftgg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.99e-07 [7.9896697918297644E-007] fbridge_mode=1 [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8079s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7907s + [COUNTERS] PROGRAM TOTAL : 0.8155s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7983s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0172s for 8192 events => throughput is 4.77E+05 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftgg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.667e-07 [7.6668083551547592E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 2.5070s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3171s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1899s for 90112 events => throughput is 4.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4117s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2230s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1888s for 90112 events => throughput is 4.77E+05 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.814826e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.821541e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.215347e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.175737e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.080489e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.119976e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.380958e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.378348e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.077544e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.122456e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.380568e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.377194e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.123241e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.167835e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.746541e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.750153e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index 6e0ebf0fe6..403e014b9f 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -4,8 +4,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone - make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-07-23_17:06:30 +DATE: 2024-07-30_02:30:37 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0 [UNWEIGHT] Wrote 1767 events (found 4306 events) - [COUNTERS] PROGRAM TOTAL : 0.6347s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6253s - [COUNTERS] Fortran MEs ( 1 ) : 0.0093s for 8192 events => throughput is 8.79E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6344s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6257s + [COUNTERS] Fortran MEs ( 1 ) : 0.0087s for 8192 events => throughput is 9.39E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3827s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3737s - [COUNTERS] Fortran MEs ( 1 ) : 0.0089s for 8192 events => throughput is 9.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3795s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3708s + [COUNTERS] Fortran MEs ( 1 ) : 0.0087s for 8192 events => throughput is 9.44E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098556244384407] fbridge_mode=0 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3840s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2894s - [COUNTERS] Fortran MEs ( 1 ) : 0.0946s for 90112 events => throughput is 9.53E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3839s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2878s + [COUNTERS] Fortran MEs ( 1 ) : 0.0961s for 90112 events => throughput is 9.38E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869291] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3976s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3899s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0077s for 8192 events => throughput is 1.06E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3908s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3828s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0080s for 8192 events => throughput is 1.02E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098556244384418] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3567s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2681s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0886s for 90112 events => throughput is 1.02E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3873s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2979s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0893s for 90112 events => throughput is 1.01E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.005595e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.877609e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.012216e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.008589e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869291] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3752s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3708s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0044s for 8192 events => throughput is 1.84E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3777s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3734s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0043s for 8192 events => throughput is 1.92E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098556244384418] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3147s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2675s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0472s for 90112 events => throughput is 1.91E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3473s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2991s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0482s for 90112 events => throughput is 1.87E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.909460e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.916899e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.971415e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.991888e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869291] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3705s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3679s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.18E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3778s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3753s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.20E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098556244384418] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.2978s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2679s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0299s for 90112 events => throughput is 3.01E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3264s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2964s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0300s for 90112 events => throughput is 3.00E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.080442e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.196233e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.074278e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.292775e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869291] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3716s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3691s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.23E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3758s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3733s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.34E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098556244384418] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3003s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2717s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0286s for 90112 events => throughput is 3.15E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3259s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2978s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0281s for 90112 events => throughput is 3.20E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.360999e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.232019e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.517113e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.441266e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869291] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3734s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3703s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.70E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3770s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3742s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.95E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098556244384418] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3066s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2761s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0304s for 90112 events => throughput is 2.96E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3189s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2878s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0311s for 90112 events => throughput is 2.90E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.892371e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.873947e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.222820e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.009395e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869280] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.8041s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8036s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.64E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8012s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8007s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.65E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098556244384401] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.7080s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7028s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0051s for 90112 events => throughput is 1.75E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7326s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7277s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0050s for 90112 events => throughput is 1.82E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.783715e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.031985e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.058188e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.027673e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.170829e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.204426e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.604074e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.619900e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.147861e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.251475e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.994548e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.980723e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.155905e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.176577e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.319425e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.319715e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index 0993fdcc1c..ef4df5280d 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone - +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-07-23_17:06:54 +DATE: 2024-07-30_02:31:01 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0 [UNWEIGHT] Wrote 1767 events (found 4306 events) - [COUNTERS] PROGRAM TOTAL : 0.6621s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6530s - [COUNTERS] Fortran MEs ( 1 ) : 0.0091s for 8192 events => throughput is 9.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6453s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6358s + [COUNTERS] Fortran MEs ( 1 ) : 0.0095s for 8192 events => throughput is 8.62E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3966s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3872s - [COUNTERS] Fortran MEs ( 1 ) : 0.0094s for 8192 events => throughput is 8.74E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3785s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3698s + [COUNTERS] Fortran MEs ( 1 ) : 0.0087s for 8192 events => throughput is 9.38E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098556244384407] fbridge_mode=0 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3966s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3008s - [COUNTERS] Fortran MEs ( 1 ) : 0.0958s for 90112 events => throughput is 9.40E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3879s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2907s + [COUNTERS] Fortran MEs ( 1 ) : 0.0972s for 90112 events => throughput is 9.27E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156021439979276] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3872s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3787s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0086s for 8192 events => throughput is 9.57E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3949s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3866s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0083s for 8192 events => throughput is 9.91E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098550550786874] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3681s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2789s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0892s for 90112 events => throughput is 1.01E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3870s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2977s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0893s for 90112 events => throughput is 1.01E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.015948e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.019572e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.021671e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.019641e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156021343761686] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3766s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3741s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.22E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3760s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3734s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.12E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098550488814170] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3081s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2794s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0287s for 90112 events => throughput is 3.14E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3162s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2880s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0282s for 90112 events => throughput is 3.20E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.304389e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.241742e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.380080e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.489875e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156021516056748] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3710s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3693s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0017s for 8192 events => throughput is 4.90E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3741s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3721s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0020s for 8192 events => throughput is 4.16E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098550596898289] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.2810s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2620s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0191s for 90112 events => throughput is 4.72E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3127s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2933s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0194s for 90112 events => throughput is 4.64E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.883752e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.046955e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.254694e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.155866e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156021516056748] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3739s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3722s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0017s for 8192 events => throughput is 4.74E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3723s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3708s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0016s for 8192 events => throughput is 5.16E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098550596898289] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3493s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3299s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0195s for 90112 events => throughput is 4.63E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3061s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2878s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0183s for 90112 events => throughput is 4.92E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.263107e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.263280e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.596148e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.614212e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156021917867366] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3804s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3783s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0020s for 8192 events => throughput is 4.02E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3743s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3724s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0019s for 8192 events => throughput is 4.27E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098551029624061] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.2838s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2628s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0210s for 90112 events => throughput is 4.30E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3225s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3009s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0217s for 90112 events => throughput is 4.16E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.410618e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.356789e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.579934e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.820638e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156022290359153] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.8021s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8016s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.58E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8031s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8026s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.67E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098551341908548] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.7042s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6994s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0047s for 90112 events => throughput is 1.90E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7224s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7177s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0047s for 90112 events => throughput is 1.92E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.004673e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.447871e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.352806e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.374493e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.848697e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.745966e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.574226e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.576920e+09 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.876693e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.835109e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.655828e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.649581e+09 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.468075e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.010304e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.699037e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.674061e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index f51812e183..604aa1925b 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -3,8 +3,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s make USEBUILDDIR=1 BACKEND=cuda - make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-07-23_17:07:18 +DATE: 2024-07-30_02:31:25 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0 [UNWEIGHT] Wrote 1767 events (found 4306 events) - [COUNTERS] PROGRAM TOTAL : 0.6364s + [COUNTERS] PROGRAM TOTAL : 0.6361s [COUNTERS] Fortran Overhead ( 0 ) : 0.6273s - [COUNTERS] Fortran MEs ( 1 ) : 0.0091s for 8192 events => throughput is 8.96E+05 events/s + [COUNTERS] Fortran MEs ( 1 ) : 0.0088s for 8192 events => throughput is 9.33E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3980s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3884s - [COUNTERS] Fortran MEs ( 1 ) : 0.0096s for 8192 events => throughput is 8.50E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3818s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3728s + [COUNTERS] Fortran MEs ( 1 ) : 0.0090s for 8192 events => throughput is 9.11E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098556244384407] fbridge_mode=0 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.4071s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3113s - [COUNTERS] Fortran MEs ( 1 ) : 0.0958s for 90112 events => throughput is 9.41E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3916s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2951s + [COUNTERS] Fortran MEs ( 1 ) : 0.0965s for 90112 events => throughput is 9.33E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156028014369008] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3870s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3789s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 8192 events => throughput is 1.00E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4765s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4678s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0087s for 8192 events => throughput is 9.44E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098557069460298] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3664s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2748s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0916s for 90112 events => throughput is 9.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3913s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2999s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0913s for 90112 events => throughput is 9.87E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.803105e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.001450e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.833012e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.010990e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156028014369008] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3826s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3782s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0044s for 8192 events => throughput is 1.86E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3828s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3783s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0045s for 8192 events => throughput is 1.81E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098557069460298] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3196s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2730s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0466s for 90112 events => throughput is 1.93E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3354s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2885s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0469s for 90112 events => throughput is 1.92E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.966382e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.971945e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.002002e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.931947e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156028097537258] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3795s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3765s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.76E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3750s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3723s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 3.02E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098557141632605] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3017s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2730s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0287s for 90112 events => throughput is 3.14E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3185s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2902s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0282s for 90112 events => throughput is 3.19E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.113986e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.227249e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.514629e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.428529e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156028097537258] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3847s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3820s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.12E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3828s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3803s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.26E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098557141632605] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3019s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2739s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0280s for 90112 events => throughput is 3.22E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3190s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2916s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0274s for 90112 events => throughput is 3.28E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.470466e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.306583e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.587854e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.603686e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156028097537258] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3731s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3703s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.87E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3759s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3732s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 3.06E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1 [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098557141632605] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3054s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2744s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0310s for 90112 events => throughput is 2.91E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3204s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2890s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0314s for 90112 events => throughput is 2.87E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.935388e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.911291e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.012886e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.272088e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3116 [0.31156027194560187] fbridge_mode=1 [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.8059s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8054s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.56E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8021s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8015s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.62E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.311 [0.31098556243340819] fbridge_mode=1 [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.7003s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6952s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0051s for 90112 events => throughput is 1.78E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7215s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7165s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0050s for 90112 events => throughput is 1.80E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.612145e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.075202e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.945612e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.006859e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.146137e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.218316e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.463844e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.512409e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.146854e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.219132e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.862724e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.838578e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.162271e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.232904e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.282910e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.292140e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index 679246dd46..ccc8fb666c 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -3,8 +3,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppavx2 @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-29_01:38:40 +DATE: 2024-07-30_02:29:19 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.7777s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7362s - [COUNTERS] Fortran MEs ( 1 ) : 0.0415s for 8192 events => throughput is 1.97E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7870s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7445s + [COUNTERS] Fortran MEs ( 1 ) : 0.0425s for 8192 events => throughput is 1.93E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3976s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3565s - [COUNTERS] Fortran MEs ( 1 ) : 0.0411s for 8192 events => throughput is 1.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3980s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3557s + [COUNTERS] Fortran MEs ( 1 ) : 0.0423s for 8192 events => throughput is 1.94E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.6909s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2365s - [COUNTERS] Fortran MEs ( 1 ) : 0.4544s for 90112 events => throughput is 1.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6859s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2259s + [COUNTERS] Fortran MEs ( 1 ) : 0.4600s for 90112 events => throughput is 1.96E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419863] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4458s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4030s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0427s for 8192 events => throughput is 1.92E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4461s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4031s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0431s for 8192 events => throughput is 1.90E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256471] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.7824s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3031s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4793s for 90112 events => throughput is 1.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7776s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2905s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4871s for 90112 events => throughput is 1.85E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.924806e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.843019e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.912421e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.891919e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4066s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3820s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0246s for 8192 events => throughput is 3.33E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4090s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3849s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0242s for 8192 events => throughput is 3.39E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256471] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.5413s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2750s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2663s for 90112 events => throughput is 3.38E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5183s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2466s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2717s for 90112 events => throughput is 3.32E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.314362e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.296496e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.440069e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.333327e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3874s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3723s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0150s for 8192 events => throughput is 5.45E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3882s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3730s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0152s for 8192 events => throughput is 5.39E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.4296s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2659s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1637s for 90112 events => throughput is 5.50E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4057s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2394s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1663s for 90112 events => throughput is 5.42E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.062177e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.389457e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.382563e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.441796e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3976s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3833s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0144s for 8192 events => throughput is 5.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3856s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3718s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0138s for 8192 events => throughput is 5.95E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.4167s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2662s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1505s for 90112 events => throughput is 5.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3926s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2420s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1506s for 90112 events => throughput is 5.98E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.763603e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.837024e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.871042e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.750073e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3996s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3785s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0211s for 8192 events => throughput is 3.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3999s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3789s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0210s for 8192 events => throughput is 3.89E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.5325s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2948s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2377s for 90112 events => throughput is 3.79E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4787s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2399s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2388s for 90112 events => throughput is 3.77E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.502625e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.628640e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.556287e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.742525e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419849] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.8405s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8399s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.41E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7929s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7924s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.44E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,8 +546,8 @@ Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.6883s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6820s + [COUNTERS] PROGRAM TOTAL : 1.6567s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6504s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 90112 events => throughput is 1.42E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.919891e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.075763e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.630666e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.680250e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.869333e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.331343e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.082168e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.078661e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.898237e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.333610e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.164202e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.157528e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.905759e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.326982e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.090826e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.065535e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index 9e00b5e78a..7ea9b14dde 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -3,9 +3,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s make USEBUILDDIR=1 BACKEND=cuda - make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -13,14 +13,14 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-29_01:39:07 +DATE: 2024-07-30_02:29:45 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.7827s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7412s - [COUNTERS] Fortran MEs ( 1 ) : 0.0416s for 8192 events => throughput is 1.97E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7844s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7430s + [COUNTERS] Fortran MEs ( 1 ) : 0.0413s for 8192 events => throughput is 1.98E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3949s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3540s - [COUNTERS] Fortran MEs ( 1 ) : 0.0410s for 8192 events => throughput is 2.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4019s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3598s + [COUNTERS] Fortran MEs ( 1 ) : 0.0421s for 8192 events => throughput is 1.95E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.6850s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2308s - [COUNTERS] Fortran MEs ( 1 ) : 0.4541s for 90112 events => throughput is 1.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6770s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2218s + [COUNTERS] Fortran MEs ( 1 ) : 0.4552s for 90112 events => throughput is 1.98E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -133,9 +133,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598853620719339] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4401s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3996s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0405s for 8192 events => throughput is 2.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4413s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4010s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0402s for 8192 events => throughput is 2.04E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577522280119403] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.7388s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2934s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4454s for 90112 events => throughput is 2.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7143s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2638s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4505s for 90112 events => throughput is 2.00E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.042800e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.005730e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.015984e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.011227e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598849697851406] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3877s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3717s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0160s for 8192 events => throughput is 5.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3944s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3779s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0165s for 8192 events => throughput is 4.97E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577518590213366] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.4390s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2600s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1790s for 90112 events => throughput is 5.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4191s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2362s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1829s for 90112 events => throughput is 4.93E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.765299e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.849548e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.747967e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.835113e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598850036412124] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3742s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3654s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0087s for 8192 events => throughput is 9.37E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3766s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3680s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0086s for 8192 events => throughput is 9.55E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577518612400254] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.3503s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2561s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0942s for 90112 events => throughput is 9.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3255s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2289s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0966s for 90112 events => throughput is 9.33E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.065332e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.237132e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.552613e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.288560e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,8 +361,8 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598850036412124] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3831s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3750s + [COUNTERS] PROGRAM TOTAL : 0.4638s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4557s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 8192 events => throughput is 1.00E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577518612400254] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.3419s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2529s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0890s for 90112 events => throughput is 1.01E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3323s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2406s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0917s for 90112 events => throughput is 9.83E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.815596e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.779781e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.012011e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.967723e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598854350242270] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3801s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3696s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0106s for 8192 events => throughput is 7.74E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3854s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3744s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0110s for 8192 events => throughput is 7.44E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577522751628507] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.3921s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2698s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1223s for 90112 events => throughput is 7.37E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4368s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3025s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1344s for 90112 events => throughput is 6.71E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.916519e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.814887e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.893526e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.923034e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598870301426373] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.7890s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7885s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.59E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.8127s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8122s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.66E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577527268256027] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.6767s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6712s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 90112 events => throughput is 1.65E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6598s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6543s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 90112 events => throughput is 1.64E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.812781e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.088081e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.214634e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.144414e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.018876e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.139206e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.400207e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.394362e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.038902e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.055443e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.501990e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.544827e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.639781e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.613034e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.489136e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.451142e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index e096eb78b5..293b5c3843 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -1,11 +1,11 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -18,8 +18,8 @@ make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-06-29_01:39:32 +DATE: 2024-07-30_02:30:11 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.7859s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7445s - [COUNTERS] Fortran MEs ( 1 ) : 0.0414s for 8192 events => throughput is 1.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7797s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7378s + [COUNTERS] Fortran MEs ( 1 ) : 0.0419s for 8192 events => throughput is 1.95E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3977s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3560s - [COUNTERS] Fortran MEs ( 1 ) : 0.0416s for 8192 events => throughput is 1.97E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3992s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3577s + [COUNTERS] Fortran MEs ( 1 ) : 0.0415s for 8192 events => throughput is 1.97E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,8 +108,8 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.6901s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2335s + [COUNTERS] PROGRAM TOTAL : 1.6830s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2264s [COUNTERS] Fortran MEs ( 1 ) : 0.4566s for 90112 events => throughput is 1.97E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** @@ -133,9 +133,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598861353577519] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4458s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4017s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0440s for 8192 events => throughput is 1.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4463s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4028s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0435s for 8192 events => throughput is 1.88E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -166,9 +166,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577525144126803] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.7736s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2953s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4783s for 90112 events => throughput is 1.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7431s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2617s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4814s for 90112 events => throughput is 1.87E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -181,12 +181,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.906547e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.837246e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.885100e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.882771e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,9 +209,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598861353577519] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4069s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3833s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0236s for 8192 events => throughput is 3.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4109s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3867s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0242s for 8192 events => throughput is 3.38E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -242,9 +242,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577525144126810] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.5337s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2711s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2626s for 90112 events => throughput is 3.43E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4988s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2366s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2622s for 90112 events => throughput is 3.44E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -257,12 +257,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.321004e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.326977e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.457355e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.385998e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,9 +285,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598861344883289] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3969s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3823s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0147s for 8192 events => throughput is 5.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3899s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3747s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0152s for 8192 events => throughput is 5.38E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -318,9 +318,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577525178109212] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.4292s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2669s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1623s for 90112 events => throughput is 5.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3995s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2371s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1624s for 90112 events => throughput is 5.55E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -333,12 +333,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.432194e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.309848e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.311023e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.292570e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -361,9 +361,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598861344883289] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3859s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3721s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0138s for 8192 events => throughput is 5.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3877s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3737s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0140s for 8192 events => throughput is 5.86E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -394,9 +394,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577525178109212] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.4048s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2571s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1477s for 90112 events => throughput is 6.10E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3822s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2318s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1504s for 90112 events => throughput is 5.99E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -409,12 +409,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.922477e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.749917e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.992472e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.559442e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,9 +437,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598861344883289] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4037s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3836s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0201s for 8192 events => throughput is 4.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4061s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3853s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0207s for 8192 events => throughput is 3.95E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -470,9 +470,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577525178109212] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.4944s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2693s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2251s for 90112 events => throughput is 4.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4838s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2534s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2304s for 90112 events => throughput is 3.91E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -485,12 +485,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.827539e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.817653e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.759224e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.793065e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -513,9 +513,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.6 [44.598860056955807] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.7908s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7902s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.43E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7919s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7913s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.32E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -546,9 +546,9 @@ Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.58 [44.577523872560512] fbridge_mode=1 [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.6927s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6862s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0065s for 90112 events => throughput is 1.38E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6623s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6558s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0065s for 90112 events => throughput is 1.40E+07 events/s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -561,42 +561,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.009355e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.039841e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.608692e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.630395e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.895025e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.356089e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.063290e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.058096e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.880064e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.328490e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.136844e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.134474e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.885288e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.330105e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.014417e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.991822e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***