Skip to content

Commit

Permalink
[june24] regenerate gg_tt.mad, after including CODEGEN fixes from cla…
Browse files Browse the repository at this point in the history
…ng PR madgraph5#905, constexpr_math.h PR madgraph5#908 and runTest/cudaDeviceReset PR madgraph5#909

Add valgrind.h and its symlink in the repo for gg_tt.mad

The new runTest.cc template now has a (commented out) proof of concept for including two tests (with/without multichannel) madgraph5#896, I will resume from there

After building bldall, the following succeeds
for bck in none sse4 avx2 512y 512z cuda; do echo $bck; ./build.${bck}_d_inl0_hrd0/runTest_*.exe; done

This instead is crashing (again?) for some AVX values
for bck in none sse4 avx2 512y 512z cuda; do echo $bck; valgrind ./build.${bck}_d_inl0_hrd0/runTest_*.exe; done
On closer inspection, this is because valgrind does not support AVX512, so this is ok
  • Loading branch information
valassi committed Jul 12, 2024
1 parent b661950 commit d505178
Show file tree
Hide file tree
Showing 9 changed files with 7,458 additions and 131 deletions.
18 changes: 9 additions & 9 deletions epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ generate g g > t t~
No model currently active, so we import the Standard Model
INFO: load particles
INFO: load vertices
DEBUG: model prefixing takes 0.006100893020629883 
DEBUG: model prefixing takes 0.005627632141113281 
INFO: Restrict model sm with file models/sm/restrict_default.dat .
DEBUG: Simplifying conditional expressions 
DEBUG: remove interactions: u s w+ at order: QED=1 
Expand Down Expand Up @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step.
INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED
INFO: Trying process: g g > t t~ WEIGHTED<=2 @1
INFO: Process has 3 diagrams
1 processes with 3 diagrams generated in 0.009 s
1 processes with 3 diagrams generated in 0.008 s
Total: 1 processes with 3 diagrams
output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False --vector_size=32
Load PLUGIN.CUDACPP_OUTPUT
Expand All @@ -178,7 +178,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1
INFO: Processing color information for process: g g > t t~ @1
INFO: Creating files in directory P1_gg_ttx
DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1151] 
DEBUG: process_exporter_cpp =  <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f481bdc5070> [export_v4.py at line 6304] 
DEBUG: process_exporter_cpp =  <PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f2907ce3f70> [export_v4.py at line 6304] 
INFO: Creating files in directory .
FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
Expand All @@ -205,12 +205,12 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx
DEBUG: split[i] =  {false,true};}; [model_handling.py at line 1584] 
DEBUG: split[i] =  {false, true};}; [model_handling.py at line 1586] 
DEBUG: split[i] =  {false, true}, // iconfigC=2, diag=3 [model_handling.py at line 1591] 
Generated helas calls for 1 subprocesses (3 diagrams) in 0.007 s
Wrote files for 10 helas calls in 0.128 s
Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
Wrote files for 10 helas calls in 0.124 s
ALOHA: aloha starts to compute helicity amplitudes
ALOHA: aloha creates VVV1 set of routines with options: P0
ALOHA: aloha creates FFV1 routines
ALOHA: aloha creates 2 routines in 0.149 s
ALOHA: aloha creates 2 routines in 0.146 s
DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 208] 
ALOHA: aloha starts to compute helicity amplitudes
ALOHA: aloha creates VVV1 set of routines with options: P0
Expand Down Expand Up @@ -256,9 +256,9 @@ Type "launch" to generate events from this process, or see
Run "open index.html" to see more information about this process.
quit

real 0m2.452s
user 0m1.890s
sys 0m0.311s
real 0m1.947s
user 0m1.670s
sys 0m0.273s
Code generation completed in 2 seconds
************************************************************
* *
Expand Down
66 changes: 12 additions & 54 deletions epochX/cudacpp/gg_tt.mad/SubProcesses/MadgraphTest.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,29 +97,6 @@ namespace
* Users need to implement:
* - Functions to retrieve matrix element and 4-momenta. These are used in the tests.
* - Driver functions that run the madgraph workflow.
*
* Usage:
* ```
* class TestImplementation : public TestDriverBase {
* <override all pure-virtual functions with Madgraph workflow>
* }
*
* class TestImplementation2 : public TestDriverBase {
* <override all pure-virtual functions with a different Madgraph workflow>
* }
*
* INSTANTIATE_TEST_SUITE_P( TestName,
* MadgraphTest,
* testing::Values( new TestImplementation, new TestImplementation2, ... ) );
*```
*
* For adapting the test workflow, see the .cc and adapt
* TEST_P(MadgraphTest, CompareMomentaAndME)
*
* To add a test that should be runnable with all test implementations that derive from TestDriverBase, add a new
* TEST_P(MadgraphTest, <TestName>) {
* <test code>
* }
*/
class TestDriverBase
{
Expand Down Expand Up @@ -184,34 +161,20 @@ class TestDriverBase

/**
* Test class that's defining all tests to run with a Madgraph workflow.
* The tests are defined below using TEST_P.
* Instantiate them using:
* ```
* INSTANTIATE_TEST_SUITE_P( TestName,
* MadgraphTest,
* testing::Values( new TestImplementation, new TestImplementation2, ... ) );
* ```
*/
class MadgraphTest : public testing::TestWithParam<TestDriverBase*>
class MadgraphTest
{
protected:
std::unique_ptr<TestDriverBase> testDriver;

MadgraphTest()
: TestWithParam(), testDriver( GetParam() )
{
}
public:
MadgraphTest( TestDriverBase& testDriverRef )
: testDriver( &testDriverRef ) {}
~MadgraphTest() {}
void CompareMomentaAndME( testing::Test& googleTest ) const; // NB: googleTest is ONLY needed for the HasFailure method...
private:
TestDriverBase* testDriver; // non-owning pointer
};

// WARNING: before the split of C++ and CUDA builds, both CPU and GPU tests were linked together into the same executable;
// it was therefore necessary to prevent multiply-defined symbols by only compiling this when "#ifndef MGONGPUCPP_GPUIMPL";
// now that runTest.exe only contains either CPU or GPU tests, this is no longer necessary!
//#ifndef MGONGPUCPP_GPUIMPL

/// Compare momenta and matrix elements.
/// This uses an implementation of TestDriverBase to run a madgraph workflow,
/// and compares momenta and matrix elements with a reference file.
TEST_P( MadgraphTest, CompareMomentaAndME )
void
MadgraphTest::CompareMomentaAndME( testing::Test& googleTest ) const
{
const fptype toleranceMomenta = std::is_same<double, fptype>::value ? 1.E-10 : 4.E-2; // see #735
#ifdef __APPLE__
Expand Down Expand Up @@ -244,7 +207,7 @@ TEST_P( MadgraphTest, CompareMomentaAndME )
{
referenceData = readReferenceData( refFileName );
}
ASSERT_FALSE( HasFailure() ); // It doesn't make any sense to continue if we couldn't read the reference file.
ASSERT_FALSE( googleTest.HasFailure() ); // It doesn't make any sense to continue if we couldn't read the reference file.
// **************************************
// *** START MAIN LOOP ON #ITERATIONS ***
// **************************************
Expand All @@ -254,7 +217,7 @@ TEST_P( MadgraphTest, CompareMomentaAndME )
testDriver->prepareMomenta( energy );
testDriver->runSigmaKin( iiter );
// --- Run checks on all events produced in this iteration
for( std::size_t ievt = 0; ievt < testDriver->nevt && !HasFailure(); ++ievt )
for( std::size_t ievt = 0; ievt < testDriver->nevt && !googleTest.HasFailure(); ++ievt )
{
if( dumpEvents )
{
Expand Down Expand Up @@ -321,9 +284,4 @@ TEST_P( MadgraphTest, CompareMomentaAndME )
}
}

// WARNING: before the split of C++ and CUDA builds, both CPU and GPU tests were linked together into the same executable;
// it was therefore necessary to prevent multiply-defined symbols by only compiling this when "#ifndef MGONGPUCPP_GPUIMPL";
// now that runTest.exe only contains either CPU or GPU tests, this is no longer necessary!
//#endif // MGONGPUCPP_GPUIMPL

#endif /* MADGRAPHTEST_H_ */
1 change: 1 addition & 0 deletions epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/valgrind.h
26 changes: 24 additions & 2 deletions epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,10 @@ ifeq ($(USEOPENMP),1)
else ifneq ($(shell $(CXX) --version | egrep '^Intel'),)
override OMPFLAGS = -fopenmp
###override OMPFLAGS = # disable OpenMP MT on Intel (was ok without GPUCC but not ok with GPUCC before #578)
else ifneq ($(shell $(CXX) --version | egrep '^clang version 16'),)
override OMPFLAGS = # disable OpenMP on clang16 #904
else ifneq ($(shell $(CXX) --version | egrep '^clang version 17'),)
override OMPFLAGS = # disable OpenMP on clang17 #904
else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),)
override OMPFLAGS = -fopenmp
###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578)
Expand Down Expand Up @@ -640,11 +644,21 @@ endif

# Target (and build options): debug
MAKEDEBUG=
debug: OPTFLAGS = -g -O0
debug: OPTFLAGS = -g -O0
debug: CUDA_OPTFLAGS = -G
debug: MAKEDEBUG := debug
debug: all.$(TAG)

# Target (and build options): address sanitizer #207
###CXXLIBFLAGSASAN =
###GPULIBFLAGSASAN =
###asan: OPTFLAGS = -g -O0 -fsanitize=address -fno-omit-frame-pointer
###asan: CUDA_OPTFLAGS = -G $(XCOMPILERFLAG) -fsanitize=address $(XCOMPILERFLAG) -fno-omit-frame-pointer
###asan: CXXLIBFLAGSASAN = -fsanitize=address
###asan: GPULIBFLAGSASAN = -Xlinker -fsanitize=address -Xlinker -shared
###asan: MAKEDEBUG := debug
###asan: all.$(TAG)

# Target: tag-specific build lockfiles
override oldtagsb=`if [ -d $(BUILDDIR) ]; then find $(BUILDDIR) -maxdepth 1 -name '.build.*' ! -name '.build.$(TAG)' -exec echo $(shell pwd)/{} \; ; fi`
$(BUILDDIR)/.build.$(TAG):
Expand Down Expand Up @@ -765,11 +779,13 @@ endif
#-------------------------------------------------------------------------------

# Target (and build rules): C++ and CUDA/HIP standalone executables
###$(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSASAN)
$(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
$(cxx_checkmain): $(BUILDDIR)/check_sa_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o
$(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS)

ifneq ($(GPUCC),)
###$(gpu_checkmain): LIBFLAGS += $(GPULIBFLAGSASAN)
ifneq ($(shell $(CXX) --version | grep ^Intel),)
$(gpu_checkmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy')
$(gpu_checkmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9')
Expand All @@ -784,9 +800,11 @@ endif
#-------------------------------------------------------------------------------

# Generic target and build rules: objects from Fortran compilation
# (NB In this makefile, this only applies to fcheck_sa_fortran.o)
# (NB -fPIC was added to fix clang16 build #904, but this seems better for other cases too and is consistent to c++ and cuda builds)
$(BUILDDIR)/%_fortran.o : %.f *.inc
@if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi
$(FC) -I. -c $< -o $@
$(FC) -I. -fPIC -c $< -o $@

# Generic target and build rules: objects from Fortran compilation
###$(BUILDDIR)/%_fortran.o : %.f *.inc
Expand All @@ -797,6 +815,7 @@ $(BUILDDIR)/%_fortran.o : %.f *.inc
# Target (and build rules): Fortran standalone executables
###$(BUILDDIR)/fcheck_sa_fortran.o : $(INCDIR)/fbridge.inc

###$(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSASAN)
ifeq ($(UNAME_S),Darwin)
$(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375
endif
Expand All @@ -809,6 +828,7 @@ else
endif

ifneq ($(GPUCC),)
###$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSASAN)
ifneq ($(shell $(CXX) --version | grep ^Intel),)
$(gpu_fcheckmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy')
$(gpu_fcheckmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9')
Expand Down Expand Up @@ -911,10 +931,12 @@ endif
###endif

ifeq ($(GPUCC),) # link only runTest_cpp.o
###$(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSASAN)
$(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
$(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS)
$(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS)
else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o)
###$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSASAN)
$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH
$(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS)
ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802
Expand Down
63 changes: 34 additions & 29 deletions epochX/cudacpp/gg_tt.mad/SubProcesses/runTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -126,16 +126,6 @@ struct CPUTest : public CUDA_CPU_TestBase
#ifdef MGONGPUCPP_GPUIMPL
struct CUDATest : public CUDA_CPU_TestBase
{
// Reset the device when our test goes out of scope. Note that this should happen after
// the frees, i.e. be declared before the pointers to device memory.
struct DeviceReset
{
~DeviceReset()
{
checkGpu( gpuDeviceReset() ); // this is needed by cuda-memcheck --leak-check full
}
} deviceResetter;

// Struct data members (process, and memory structures for random numbers, momenta, matrix elements and weights on host and device)
// [NB the hst/dev memory arrays must be initialised in the constructor, see issue #290]
CPPProcess process;
Expand Down Expand Up @@ -250,24 +240,39 @@ struct CUDATest : public CUDA_CPU_TestBase
};
#endif /* clang-format off */

// Use two levels of macros to force stringification at the right level
// (see https://gcc.gnu.org/onlinedocs/gcc-3.0.1/cpp_3.html#SEC17 and https://stackoverflow.com/a/3419392)
// Google macro is in https://github.com/google/googletest/blob/master/googletest/include/gtest/gtest-param-test.h
#define TESTID_CPU( s ) s##_CPU
#define XTESTID_CPU( s ) TESTID_CPU( s )
#define MG_INSTANTIATE_TEST_SUITE_CPU( prefix, test_suite_name ) \
INSTANTIATE_TEST_SUITE_P( prefix, \
test_suite_name, \
testing::Values( new CPUTest( MG_EPOCH_REFERENCE_FILE_NAME ) ) );
#define TESTID_GPU( s ) s##_GPU
#define XTESTID_GPU( s ) TESTID_GPU( s )
#define MG_INSTANTIATE_TEST_SUITE_GPU( prefix, test_suite_name ) \
INSTANTIATE_TEST_SUITE_P( prefix, \
test_suite_name, \
testing::Values( new CUDATest( MG_EPOCH_REFERENCE_FILE_NAME ) ) );

// AV July 2024 much simpler class structure without the presently-unnecessary googletest templates
// This is meant as a workaround to prevent not-understood segfault #907 when adding a second test
#ifdef MGONGPUCPP_GPUIMPL
MG_INSTANTIATE_TEST_SUITE_GPU( XTESTID_GPU( MG_EPOCH_PROCESS_ID ), MadgraphTest );
// CUDA test 1
CUDATest cudaDriver1( MG_EPOCH_REFERENCE_FILE_NAME );
MadgraphTest mgTest1( cudaDriver1 );
#define TESTID1( s ) s##_GPU_MADGRAPH1
#define XTESTID1( s ) TESTID1( s )
// CUDA test 2
//CUDATest cudaDriver2( MG_EPOCH_REFERENCE_FILE_NAME );
//MadgraphTest mgTest2( cudaDriver2 );
//#define TESTID2( s ) s##_GPU_MADGRAPH2
//#define XTESTID2( s ) TESTID2( s )
#else
MG_INSTANTIATE_TEST_SUITE_CPU( XTESTID_CPU( MG_EPOCH_PROCESS_ID ), MadgraphTest );
#endif /* clang-format on */
// CPU test 1
CPUTest cppDriver1( MG_EPOCH_REFERENCE_FILE_NAME );
MadgraphTest mgTest1( cppDriver1 );
#define TESTID1( s ) s##_CPU_MADGRAPH1
#define XTESTID1( s ) TESTID1( s )
// CPU test 2
//CPUTest cppDriver2( MG_EPOCH_REFERENCE_FILE_NAME );
//MadgraphTest mgTest2( cppDriver2 );
//#define TESTID2( s ) s##_CPU_MADGRAPH2
//#define XTESTID2( s ) TESTID2( s )
#endif
// Instantiate Google test 1
TEST( XTESTID1( MG_EPOCH_PROCESS_ID ), compareMomAndME )
{
mgTest1.CompareMomentaAndME( *this );
}
// Instantiate Google test 2
//TEST( XTESTID2( MG_EPOCH_PROCESS_ID ), compareMomAndME )
//{
// mgTest2.CompareMomentaAndME( *this );
//}
/* clang-format on */
Loading

0 comments on commit d505178

Please sign in to comment.