[june24] regenerate gg_tt.mad, after including CODEGEN fixes from cla…

…ng PR madgraph5#905, constexpr_math.h PR madgraph5#908 and runTest/cudaDeviceReset PR madgraph5#909 Add valgrind.h and its symlink in the repo for gg_tt.mad The new runTest.cc template now has a (commented out) proof of concept for including two tests (with/without multichannel) madgraph5#896, I will resume from there After building bldall, the following succeeds for bck in none sse4 avx2 512y 512z cuda; do echo $bck; ./build.${bck}_d_inl0_hrd0/runTest_*.exe; done This instead is crashing (again?) for some AVX values for bck in none sse4 avx2 512y 512z cuda; do echo $bck; valgrind ./build.${bck}_d_inl0_hrd0/runTest_*.exe; done On closer inspection, this is because valgrind does not support AVX512, so this is ok
valassi · Jul 12, 2024 · d505178 · d505178
1 parent b661950
commit d505178
Show file tree

Hide file tree

Showing 9 changed files with 7,458 additions and 131 deletions.
diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
@@ -62,7 +62,7 @@ generate g g > t t~
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.006100893020629883 [0m
+[1;32mDEBUG: model prefixing  takes 0.005627632141113281 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ WEIGHTED<=2 @1  
 INFO: Process has 3 diagrams 
-1 processes with 3 diagrams generated in 0.009 s
+1 processes with 3 diagrams generated in 0.008 s
 Total: 1 processes with 3 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
@@ -178,7 +178,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1
 INFO: Processing color information for process: g g > t t~ @1 
 INFO: Creating files in directory P1_gg_ttx 
 [1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1151][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f481bdc5070> [1;30m[export_v4.py at line 6304][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f2907ce3f70> [1;30m[export_v4.py at line 6304][0m [0m
 INFO: Creating files in directory . 
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
@@ -205,12 +205,12 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx
 [1;32mDEBUG:  split[i] = [0m {false,true};}; [1;30m[model_handling.py at line 1584][0m [0m
 [1;32mDEBUG:  split[i] = [0m     {false, true};}; [1;30m[model_handling.py at line 1586][0m [0m
 [1;32mDEBUG:  split[i] = [0m     {false, true}, // iconfigC=2, diag=3 [1;30m[model_handling.py at line 1591][0m [0m
-Generated helas calls for 1 subprocesses (3 diagrams) in 0.007 s
-Wrote files for 10 helas calls in 0.128 s
+Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
+Wrote files for 10 helas calls in 0.124 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.149 s
+ALOHA: aloha creates 2 routines in  0.146 s
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.convert_model (create the model) [1;30m[output.py at line 208][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
@@ -256,9 +256,9 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.452s
-user	0m1.890s
-sys	0m0.311s
+real	0m1.947s
+user	0m1.670s
+sys	0m0.273s
 Code generation completed in 2 seconds
 ************************************************************
 *                                                          *

diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/MadgraphTest.h b/epochX/cudacpp/gg_tt.mad/SubProcesses/MadgraphTest.h
@@ -97,29 +97,6 @@ namespace
  * Users need to implement:
  * - Functions to retrieve matrix element and 4-momenta. These are used in the tests.
  * - Driver functions that run the madgraph workflow.
- *
- * Usage:
- * ```
- * class TestImplementation : public TestDriverBase {
- *   <override all pure-virtual functions with Madgraph workflow>
- * }
- *
- * class TestImplementation2 : public TestDriverBase {
- *   <override all pure-virtual functions with a different Madgraph workflow>
- * }
- *
- * INSTANTIATE_TEST_SUITE_P( TestName,
- *                           MadgraphTest,
- *                           testing::Values( new TestImplementation, new TestImplementation2, ... ) );
- *```
- *
- * For adapting the test workflow, see the .cc and adapt
- *   TEST_P(MadgraphTest, CompareMomentaAndME)
- *
- * To add a test that should be runnable with all test implementations that derive from TestDriverBase, add a new
- *   TEST_P(MadgraphTest, <TestName>) {
- *     <test code>
- *   }
  */
 class TestDriverBase
 {
@@ -184,34 +161,20 @@ class TestDriverBase
 
 /**
  * Test class that's defining all tests to run with a Madgraph workflow.
- * The tests are defined below using TEST_P.
- * Instantiate them using:
- * ```
- * INSTANTIATE_TEST_SUITE_P( TestName,
- *                           MadgraphTest,
- *                           testing::Values( new TestImplementation, new TestImplementation2, ... ) );
- * ```
  */
-class MadgraphTest : public testing::TestWithParam<TestDriverBase*>
+class MadgraphTest
 {
-protected:
-  std::unique_ptr<TestDriverBase> testDriver;
-
-  MadgraphTest()
-    : TestWithParam(), testDriver( GetParam() )
-  {
-  }
+public:
+  MadgraphTest( TestDriverBase& testDriverRef )
+    : testDriver( &testDriverRef ) {}
+  ~MadgraphTest() {}
+  void CompareMomentaAndME( testing::Test& googleTest ) const; // NB: googleTest is ONLY needed for the HasFailure method...
+private:
+  TestDriverBase* testDriver; // non-owning pointer
 };
 
-// WARNING: before the split of C++ and CUDA builds, both CPU and GPU tests were linked together into the same executable;
-// it was therefore necessary to prevent multiply-defined symbols by only compiling this when "#ifndef MGONGPUCPP_GPUIMPL";
-// now that runTest.exe only contains either CPU or GPU tests, this is no longer necessary!
-//#ifndef MGONGPUCPP_GPUIMPL
-
-/// Compare momenta and matrix elements.
-/// This uses an implementation of TestDriverBase to run a madgraph workflow,
-/// and compares momenta and matrix elements with a reference file.
-TEST_P( MadgraphTest, CompareMomentaAndME )
+void
+MadgraphTest::CompareMomentaAndME( testing::Test& googleTest ) const
 {
   const fptype toleranceMomenta = std::is_same<double, fptype>::value ? 1.E-10 : 4.E-2; // see #735
 #ifdef __APPLE__
@@ -244,7 +207,7 @@ TEST_P( MadgraphTest, CompareMomentaAndME )
   {
     referenceData = readReferenceData( refFileName );
   }
-  ASSERT_FALSE( HasFailure() ); // It doesn't make any sense to continue if we couldn't read the reference file.
+  ASSERT_FALSE( googleTest.HasFailure() ); // It doesn't make any sense to continue if we couldn't read the reference file.
   // **************************************
   // *** START MAIN LOOP ON #ITERATIONS ***
   // **************************************
@@ -254,7 +217,7 @@ TEST_P( MadgraphTest, CompareMomentaAndME )
     testDriver->prepareMomenta( energy );
     testDriver->runSigmaKin( iiter );
     // --- Run checks on all events produced in this iteration
-    for( std::size_t ievt = 0; ievt < testDriver->nevt && !HasFailure(); ++ievt )
+    for( std::size_t ievt = 0; ievt < testDriver->nevt && !googleTest.HasFailure(); ++ievt )
     {
       if( dumpEvents )
       {
@@ -321,9 +284,4 @@ TEST_P( MadgraphTest, CompareMomentaAndME )
   }
 }
 
-// WARNING: before the split of C++ and CUDA builds, both CPU and GPU tests were linked together into the same executable;
-// it was therefore necessary to prevent multiply-defined symbols by only compiling this when "#ifndef MGONGPUCPP_GPUIMPL";
-// now that runTest.exe only contains either CPU or GPU tests, this is no longer necessary!
-//#endif // MGONGPUCPP_GPUIMPL
-
 #endif /* MADGRAPHTEST_H_ */
diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/valgrind.h b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/valgrind.h
@@ -0,0 +1 @@
+../valgrind.h
diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
@@ -378,6 +378,10 @@ ifeq ($(USEOPENMP),1)
   else ifneq ($(shell $(CXX) --version | egrep '^Intel'),)
     override OMPFLAGS = -fopenmp
     ###override OMPFLAGS = # disable OpenMP MT on Intel (was ok without GPUCC but not ok with GPUCC before #578)
+  else ifneq ($(shell $(CXX) --version | egrep '^clang version 16'),)
+    override OMPFLAGS = # disable OpenMP on clang16 #904
+  else ifneq ($(shell $(CXX) --version | egrep '^clang version 17'),)
+    override OMPFLAGS = # disable OpenMP on clang17 #904
   else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),)
     override OMPFLAGS = -fopenmp
     ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578)
@@ -640,11 +644,21 @@ endif
 
 # Target (and build options): debug
 MAKEDEBUG=
-debug: OPTFLAGS   = -g -O0
+debug: OPTFLAGS = -g -O0
 debug: CUDA_OPTFLAGS = -G
 debug: MAKEDEBUG := debug
 debug: all.$(TAG)
 
+# Target (and build options): address sanitizer #207
+###CXXLIBFLAGSASAN =
+###GPULIBFLAGSASAN =
+###asan: OPTFLAGS = -g -O0 -fsanitize=address -fno-omit-frame-pointer
+###asan: CUDA_OPTFLAGS = -G $(XCOMPILERFLAG) -fsanitize=address $(XCOMPILERFLAG) -fno-omit-frame-pointer
+###asan: CXXLIBFLAGSASAN = -fsanitize=address
+###asan: GPULIBFLAGSASAN = -Xlinker -fsanitize=address -Xlinker -shared
+###asan: MAKEDEBUG := debug
+###asan: all.$(TAG)
+
 # Target: tag-specific build lockfiles
 override oldtagsb=`if [ -d $(BUILDDIR) ]; then find $(BUILDDIR) -maxdepth 1 -name '.build.*' ! -name '.build.$(TAG)' -exec echo $(shell pwd)/{} \; ; fi`
 $(BUILDDIR)/.build.$(TAG):
@@ -765,11 +779,13 @@ endif
 #-------------------------------------------------------------------------------
 
 # Target (and build rules): C++ and CUDA/HIP standalone executables
+###$(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSASAN)
 $(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
 $(cxx_checkmain): $(BUILDDIR)/check_sa_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o
 	$(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS)
 
 ifneq ($(GPUCC),)
+###$(gpu_checkmain): LIBFLAGS += $(GPULIBFLAGSASAN)
 ifneq ($(shell $(CXX) --version | grep ^Intel),)
 $(gpu_checkmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy')
 $(gpu_checkmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9')
@@ -784,9 +800,11 @@ endif
 #-------------------------------------------------------------------------------
 
 # Generic target and build rules: objects from Fortran compilation
+# (NB In this makefile, this only applies to fcheck_sa_fortran.o)
+# (NB -fPIC was added to fix clang16 build #904, but this seems better for other cases too and is consistent to c++ and cuda builds)
 $(BUILDDIR)/%_fortran.o : %.f *.inc
 	@if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi
-	$(FC) -I. -c $< -o $@
+	$(FC) -I. -fPIC -c $< -o $@
 
 # Generic target and build rules: objects from Fortran compilation
 ###$(BUILDDIR)/%_fortran.o : %.f *.inc
@@ -797,6 +815,7 @@ $(BUILDDIR)/%_fortran.o : %.f *.inc
 # Target (and build rules): Fortran standalone executables
 ###$(BUILDDIR)/fcheck_sa_fortran.o : $(INCDIR)/fbridge.inc
 
+###$(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSASAN)
 ifeq ($(UNAME_S),Darwin)
 $(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375
 endif
@@ -809,6 +828,7 @@ else
 endif
 
 ifneq ($(GPUCC),)
+###$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSASAN)
 ifneq ($(shell $(CXX) --version | grep ^Intel),)
 $(gpu_fcheckmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy')
 $(gpu_fcheckmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9')
@@ -911,10 +931,12 @@ endif
 ###endif
 
 ifeq ($(GPUCC),) # link only runTest_cpp.o
+###$(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSASAN)
 $(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
 $(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS)
 	$(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS)
 else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o)
+###$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSASAN)
 $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
 $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS)
 ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802

diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/runTest.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/runTest.cc
@@ -126,16 +126,6 @@ struct CPUTest : public CUDA_CPU_TestBase
 #ifdef MGONGPUCPP_GPUIMPL
 struct CUDATest : public CUDA_CPU_TestBase
 {
-  // Reset the device when our test goes out of scope. Note that this should happen after
-  // the frees, i.e. be declared before the pointers to device memory.
-  struct DeviceReset
-  {
-    ~DeviceReset()
-    {
-      checkGpu( gpuDeviceReset() ); // this is needed by cuda-memcheck --leak-check full
-    }
-  } deviceResetter;
-
   // Struct data members (process, and memory structures for random numbers, momenta, matrix elements and weights on host and device)
   // [NB the hst/dev memory arrays must be initialised in the constructor, see issue #290]
   CPPProcess process;
@@ -250,24 +240,39 @@ struct CUDATest : public CUDA_CPU_TestBase
 };
 #endif /* clang-format off */
 
-// Use two levels of macros to force stringification at the right level
-// (see https://gcc.gnu.org/onlinedocs/gcc-3.0.1/cpp_3.html#SEC17 and https://stackoverflow.com/a/3419392)
-// Google macro is in https://github.com/google/googletest/blob/master/googletest/include/gtest/gtest-param-test.h
-#define TESTID_CPU( s ) s##_CPU
-#define XTESTID_CPU( s ) TESTID_CPU( s )
-#define MG_INSTANTIATE_TEST_SUITE_CPU( prefix, test_suite_name ) \
-INSTANTIATE_TEST_SUITE_P( prefix, \
-                          test_suite_name, \
-                          testing::Values( new CPUTest( MG_EPOCH_REFERENCE_FILE_NAME ) ) );
-#define TESTID_GPU( s ) s##_GPU
-#define XTESTID_GPU( s ) TESTID_GPU( s )
-#define MG_INSTANTIATE_TEST_SUITE_GPU( prefix, test_suite_name ) \
-INSTANTIATE_TEST_SUITE_P( prefix, \
-                          test_suite_name, \
-                          testing::Values( new CUDATest( MG_EPOCH_REFERENCE_FILE_NAME ) ) );
-
+// AV July 2024 much simpler class structure without the presently-unnecessary googletest templates
+// This is meant as a workaround to prevent not-understood segfault #907 when adding a second test
 #ifdef MGONGPUCPP_GPUIMPL
-MG_INSTANTIATE_TEST_SUITE_GPU( XTESTID_GPU( MG_EPOCH_PROCESS_ID ), MadgraphTest );
+// CUDA test 1
+CUDATest cudaDriver1( MG_EPOCH_REFERENCE_FILE_NAME );
+MadgraphTest mgTest1( cudaDriver1 );
+#define TESTID1( s ) s##_GPU_MADGRAPH1
+#define XTESTID1( s ) TESTID1( s )
+// CUDA test 2
+//CUDATest cudaDriver2( MG_EPOCH_REFERENCE_FILE_NAME );
+//MadgraphTest mgTest2( cudaDriver2 );
+//#define TESTID2( s ) s##_GPU_MADGRAPH2
+//#define XTESTID2( s ) TESTID2( s )
 #else
-MG_INSTANTIATE_TEST_SUITE_CPU( XTESTID_CPU( MG_EPOCH_PROCESS_ID ), MadgraphTest );
-#endif /* clang-format on */
+// CPU test 1
+CPUTest cppDriver1( MG_EPOCH_REFERENCE_FILE_NAME );
+MadgraphTest mgTest1( cppDriver1 );
+#define TESTID1( s ) s##_CPU_MADGRAPH1
+#define XTESTID1( s ) TESTID1( s )
+// CPU test 2
+//CPUTest cppDriver2( MG_EPOCH_REFERENCE_FILE_NAME );
+//MadgraphTest mgTest2( cppDriver2 );
+//#define TESTID2( s ) s##_CPU_MADGRAPH2
+//#define XTESTID2( s ) TESTID2( s )
+#endif
+// Instantiate Google test 1
+TEST( XTESTID1( MG_EPOCH_PROCESS_ID ), compareMomAndME )
+{
+  mgTest1.CompareMomentaAndME( *this );
+}
+// Instantiate Google test 2
+//TEST( XTESTID2( MG_EPOCH_PROCESS_ID ), compareMomAndME )
+//{
+//  mgTest2.CompareMomentaAndME( *this );
+//}
+/* clang-format on */