Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

merge gpu branch onto main #21

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
379 changes: 215 additions & 164 deletions CMakeLists.txt

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions compile/power9/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
CAMP GPU : Instructions for [CTE-POWER](https://www.bsc.es/user-support/power.php) cluster
======

Run "./compile.libs.camp.sh" to compile CAMP from scratch

Run "./make.camp.power9.sh" to execute "TestMonarch.py" test

We recommend to use the file "TestMonarch.py" for testing the GPU branch. It includes multiple configuration variables, such as number of cells, case, MPI processes, etc. More info about the test can be found at "camp/test/monarch/".
8 changes: 8 additions & 0 deletions compile/power9/check.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
set -e
cd ../../build
make -j 4
ctest --output-on-failure
#./unit_test_aero_rep_single_particle
cd ../test/monarch
#./checkGPU.sh
#python checkGPU.py
72 changes: 72 additions & 0 deletions compile/power9/compile.camp.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/usr/bin/env bash

relative_path="../../../"
curr_path=$(pwd)

LOCAL_MACHINE=CGUZMAN
if [ $BSC_MACHINE == "power" ]; then
module load GCC/7.3.0-2.30
module load OpenMPI/3.1.0-GCC-7.3.0-2.30
module load JasPer/1.900.1-foss-2018b
module load netCDF/4.6.1-foss-2018b
module load netCDF-Fortran/4.4.4-foss-2018b
module load ESMF/6.3.0rp1-foss-2018b
module load CMake/3.15.3-GCCcore-7.3.0
module load OpenBLAS/0.3.1-GCC-7.3.0-2.30
module load CUDA/10.1.105-ES
module load Python/3.7.0-foss-2018b
module load matplotlib/3.1.1-foss-2018b-Python-3.7.0
export NETCDF_FORTRAN_HOME=${EBROOTNETCDFMINFORTRAN}
export NETCDF_HOME=${EBROOTNETCDF}
export NETCDF_FORTRAN_LIB="/gpfs/projects/bsc32/software/rhel/7.5/ppc64le/POWER9/software/netCDF-Fortran/4.4.4-foss-2018b/lib/libnetcdff.so"
export NETCDF_INCLUDE_DIR="/gpfs/projects/bsc32/software/rhel/7.5/ppc64le/POWER9/software/netCDF/4.6.1-foss-2018b/include"
export JSON_FORTRAN_HOME=$(pwd)/$relative_path/json-fortran-6.1.0/install/jsonfortran-gnu-6.1.0
mpifort=$(which mpifort)
elif [ $BSC_MACHINE == "mn4" ]; then
export JSON_FORTRAN_HOME=$(pwd)/$relative_path/json-fortran-6.1.0/install/jsonfortran-intel-6.1.0
mpifort=$(which mpiifort)
module load cmake
module load gsl
module load jasper/1.900.1
module load netcdf/4.4.1.1
module load hdf5/1.8.19
module load libpng/1.5.13
elif [ LOCAL_MACHINE==CGUZMAN ]; then
mpifort=$(which mpifort)
if ! command -v mpicc &> /dev/null; then
echo "MPI is not installed. Installing..."
sudo apt update
sudo apt install -y mpi-default-dev
fi
else
echo "Unknown architecture"
exit
fi
export SUNDIALS_HOME=$(pwd)/$relative_path/cvode-3.4-alpha/install
export SUITE_SPARSE_HOME=$(pwd)/$relative_path/SuiteSparse

cd ../../
rm -rf build
mkdir build
cd build

cmake -D CMAKE_C_COMPILER=$(which mpicc) \
-D CMAKE_BUILD_TYPE=debug \
-D CMAKE_C_FLAGS_DEBUG="-g -O3" \
-D CMAKE_Fortran_FLAGS_DEBUG="-g -O3" \
-D CMAKE_C_FLAGS_RELEASE="-std=c99" \
-D CMAKE_Fortran_FLAGS_RELEASE="" \
-D CMAKE_Fortran_COMPILER=$mpifort \
-D DISABLE_TESTS=ON \
-D ENABLE_DEBUG=OFF \
-D FAILURE_DETAIL=OFF \
-D ENABLE_MPI=ON \
-D ENABLE_GPU=ON \
-D ENABLE_GSL:BOOL=FALSE \
-D ENABLE_NETCDF=ON \
..

ln -sf ../test/monarch/settings
ln -sf ../test/monarch/out
make -j 4 VERBOSE=1
cd $curr_path
35 changes: 35 additions & 0 deletions compile/power9/compile.cvode-3.4-alpha.power9.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/usr/bin/env bash

library_path="../../../"
curr_path=$(pwd)

if [ -z "$SUITE_SPARSE_CAMP_ROOT" ]; then
SUITE_SPARSE_CAMP_ROOT=$(pwd)/$library_path/SuiteSparse
fi

cd $library_path/cvode-3.4-alpha
rm -r build
mkdir build
rm -rf install
mkdir install
mkdir install/examples
cd build
cmake -D CMAKE_BUILD_TYPE=debug \
-D CMAKE_C_FLAGS_DEBUG="-O3" \
-D MPI_ENABLE:BOOL=TRUE \
-D KLU_ENABLE:BOOL=TRUE \
-D CUDA_ENABLE:BOOL=FALSE \
-D CMAKE_C_COMPILER=$(which mpicc) \
-D EXAMPLES_ENABLE_CUDA=OFF \
-D KLU_LIBRARY_DIR=$SUITE_SPARSE_CAMP_ROOT/lib \
-D KLU_INCLUDE_DIR=$SUITE_SPARSE_CAMP_ROOT/include \
-D CMAKE_INSTALL_PREFIX=$(pwd)/../install \
-D EXAMPLES_ENABLE_C=OFF \
..
#-D EXAMPLES_INSTALL_PATH=$(pwd)/../install/examples .. \
#-D CMAKE_CXX_FLAGS="-O3 -lcudart -lcublas" \
#-D CMAKE_C_FLAGS ="-O3 -lcudart -lcublas" \
#-D CMAKE_CUDA_FLAGS="-Xcompiler="-fpermissive" -lcudart -lcublas" \
#-D EXAMPLES_ENABLE_C=OFF \
make install
cd $curr_path
3 changes: 3 additions & 0 deletions compile/power9/compile.cvode.camp.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/usr/bin/env bash
./compile.cvode-3.4-alpha.power9.sh
./compile.camp.sh
12 changes: 12 additions & 0 deletions compile/power9/compile.json-fortran-6.1.0.power9.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/usr/bin/env bash

library_path="../../../"
curr_path=$(pwd)
cd $library_path/json-fortran-6.1.0
rm -r build
mkdir build
mkdir install
cd build
cmake -D SKIP_DOC_GEN:BOOL=TRUE -D CMAKE_INSTALL_PREFIX=$(pwd)/../install ..
make install
cd $curr_path
37 changes: 37 additions & 0 deletions compile/power9/compile.libs.camp.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/usr/bin/env bash

if [ ! -z ${BSC_MACHINE+x} ]; then
if [ $BSC_MACHINE == "power" ]; then
#MONARCH P9 compilation
module load GCC/7.3.0-2.30
module load OpenMPI/3.1.0-GCC-7.3.0-2.30
module load bsc/commands
module load JasPer/1.900.1-foss-2018b
module load netCDF/4.6.1-foss-2018b
module load netCDF-Fortran/4.4.4-foss-2018b
module load ESMF/6.3.0rp1-foss-2018b
module load CMake/3.15.3-GCCcore-7.3.0
module load OpenBLAS/0.3.1-GCC-7.3.0-2.30
module load CUDA/10.1.105-ES
export NETCDF_FORTRAN_HOME=${EBROOTNETCDFMINFORTRAN}
export NETCDF_HOME=${EBROOTNETCDF}
export NETCDF_FORTRAN_LIB="/gpfs/projects/bsc32/software/rhel/7.5/ppc64le/POWER9/software/netCDF-Fortran/4.4.4-foss-2018b/lib/libnetcdff.so"
export NETCDF_INCLUDE_DIR="/gpfs/projects/bsc32/software/rhel/7.5/ppc64le/POWER9/software/netCDF/4.6.1-foss-2018b/include"
elif [ $BSC_MACHINE == "mn4" ]; then
echo "mn4"
module load gsl
module load jasper/1.900.1
module load netcdf/4.4.1.1
module load hdf5/1.8.19
module load libpng/1.5.13
else
echo "Unknown architecture"
exit
fi
fi

./compile.json-fortran-6.1.0.power9.sh
./compile.suiteSparse.power9.sh
./compile.cvode-3.4-alpha.power9.sh
./compile.camp.sh

19 changes: 19 additions & 0 deletions compile/power9/compile.suiteSparse.power9.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/usr/bin/env bash

library_path="../../../"
curr_path=$(pwd)
cd $library_path/SuiteSparse
make purge
LOCAL_MACHINE=CGUZMAN
if [ $BSC_MACHINE == "power" ]; then
make BLAS="-L${EBROOTOPENBLAS}/lib -lopenblas" LAPACK=""
elif [ $BSC_MACHINE == "mn4" ]; then
make BLAS="-L${INTEL_HOME}/mkl/lib/intel64 -lmkl_intel_lp64 -lmkl_core -lmkl_intel_thread -lpthread -lm" LAPACK=""
elif [ $LOCAL_MACHINE==CGUZMAN ]; then
make BLAS="-L/usr/lib/x86_64-linux-gnu -lopenblas" LAPACK=""
else
echo "Unknown architecture"
exit
fi
export SUITE_SPARSE_CAMP_ROOT=$(pwd)/$library_path/
cd $curr_path
24 changes: 24 additions & 0 deletions compile/power9/cuda_memcheck.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
compile(){
export SUNDIALS_HOME=$(pwd)/../../../cvode-3.4-alpha/install
export SUITE_SPARSE_HOME=$(pwd)/../../../SuiteSparse
export JSON_FORTRAN_HOME=$(pwd)/../../../json-fortran-6.1.0/install/jsonfortran-gnu-6.1.0

if [ $BSC_MACHINE == "power" ]; then
export JSON_FORTRAN_HOME=$(pwd)/../../../json-fortran-6.1.0/install/jsonfortran-gnu-6.1.0
elif [ $BSC_MACHINE == "mn4" ]; then
export JSON_FORTRAN_HOME=$(pwd)/../../../json-fortran-6.1.0/install/jsonfortran-intel-6.1.0
else
echo "Unknown architecture"
exit
fi

curr_path=$(pwd)
cd ../../build
if ! make -j ${NUMPROC}; then
exit
fi
cd $curr_path
}
time compile
cd ../../build
cuda-memcheck mock_monarch 2>&1 | tee "../compile/power9/a.txt"
5 changes: 5 additions & 0 deletions compile/power9/cvode.camp.run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/usr/bin/env bash

source make.cvode.camp.sh
compile_camp_cvode
./run.sh
4 changes: 4 additions & 0 deletions compile/power9/cvode.camp.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/usr/bin/env bash
set -e
./compile.cvode-3.4-alpha.power9.sh || exit 1
./make.camp.power9.sh
23 changes: 23 additions & 0 deletions compile/power9/debug.camp.diff.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/env bash
#todo: enable "USE_PRINT_ARRAYS"
#todo: enable "USE_BCG"
#todo: enable "CAMP_DEBUG_NVECTOR"
source make.camp.sh
make_camp
FILE=diff_TestMonarch.py
cd ../../test/monarch
#log_path="/gpfs/scratch/bsc32/bsc32815/a591/nmmb-monarch/MODEL/SRC_LIBS/camp/compile/power9/log_gpu.txt"
log_path="../../compile/power9/log_cpu.txt"
#echo "Generating log file at " $log_path
python $FILE > $log_path
#python $FILE 2>&1 | tee $log_path
#cells=1
sed -i 's/conf.caseBase = "CPU One-cell"/conf.caseBase = "GPU BDF"/g' $FILE
#sed -i 's/conf.cells = \[1\]/conf.cells = \['"$cells"'\]/g' $FILE
log_path="../../compile/power9/log_gpu.txt"
#python $FILE 2>&1 | tee $log_path
python $FILE > $log_path
sed -i 's/conf.caseBase = "GPU BDF"/conf.caseBase = "CPU One-cell"/g' $FILE
#sed -i 's/conf.cells = \['"$cells"'\]/conf.cells = \[1\]/g' $FILE
cd ../../compile/power9
diff log_cpu.txt log_gpu.txt 2>&1 | tee diff.txt
23 changes: 23 additions & 0 deletions compile/power9/debug.cvode.camp.diff.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/env bash
#todo: enable "USE_PRINT_ARRAYS"
#todo: enable "USE_BCG"
#todo: enable "CAMP_DEBUG_NVECTOR"
source make.cvode.camp.sh
compile_camp_cvode
FILE=diff_TestMonarch.py
cd ../../test/monarch
#log_path="/gpfs/scratch/bsc32/bsc32815/a591/nmmb-monarch/MODEL/SRC_LIBS/camp/compile/power9/log_gpu.txt"
log_path="../../compile/power9/log_cpu.txt"
#echo "Generating log file at " $log_path
python $FILE > $log_path
#python $FILE 2>&1 | tee $log_path
#cells=1
sed -i 's/conf.caseBase = "CPU One-cell"/conf.caseBase = "GPU BDF"/g' $FILE
#sed -i 's/conf.cells = \[1\]/conf.cells = \['"$cells"'\]/g' $FILE
log_path="../../compile/power9/log_gpu.txt"
#python $FILE 2>&1 | tee $log_path
python $FILE > $log_path
sed -i 's/conf.caseBase = "GPU BDF"/conf.caseBase = "CPU One-cell"/g' $FILE
#sed -i 's/conf.cells = \['"$cells"'\]/conf.cells = \[1\]/g' $FILE
cd ../../compile/power9
diff log_cpu.txt log_gpu.txt 2>&1 | tee diff.txt
8 changes: 8 additions & 0 deletions compile/power9/make.camp.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/usr/bin/env bash

make_camp(){
curr_path=$(pwd)
cd ../../build
make || exit 1
cd $curr_path
}
19 changes: 19 additions & 0 deletions compile/power9/make.cvode.camp.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/usr/bin/env bash
set -e
source make.camp.sh
compile_cvode(){
curr_path=$(pwd)
library_path="../../../"
if [ -z "$SUITE_SPARSE_CAMP_ROOT" ]; then
SUITE_SPARSE_CAMP_ROOT=$(pwd)/$library_path/SuiteSparse
fi
cd $library_path/cvode-3.4-alpha
cd build
make install
cd $curr_path
}

compile_camp_cvode(){
compile_cvode
make_camp
}
6 changes: 6 additions & 0 deletions compile/power9/portability.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
**GPU Portability between different architectures**

Default configuration is for CTE-POWER architecture from Barcelona Supercomputing Center.
Running GPUs in a different architecture may produce an error in some functionalities.
There is a list of detected issues with different architecture:
- Multi-GPUs should follow same architecture, which corresponds to each GPU connected to two CPUs (in our case, 20 cores per CPU, 2 CPU per node and 4 GPUs per node). Variables endDevice and startDevices should be modified correspondingly to follow a differente architecture.
4 changes: 4 additions & 0 deletions compile/power9/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/usr/bin/env bash

cd ../../test/monarch
./run.sh
12 changes: 12 additions & 0 deletions compile/power9/sbatch_run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/usr/bin/env bash
##SBATCH --qos=debug
#SBATCH --job-name=camp_test_monarch
#SBATCH --output=out_sbatch.txt
#SBATCH --error=err_sbatch.txt
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=160
#SBATCH --gres=gpu:4
#SBATCH --exclusive

cd ../../test/monarch
./sbatch_run.sh
4 changes: 4 additions & 0 deletions compile/ubuntu/compile_camp.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/usr/bin/env bash

cd ../../build
make
23 changes: 23 additions & 0 deletions compile/ubuntu/compile_cvode.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/env bash

library_path="../../../"
if [ "$1" == "from_camp_jobs" ]; then
library_path="../../../../"
fi

if [ -z "$SUITE_SPARSE_CAMP_ROOT" ]; then
SUITE_SPARSE_CAMP_ROOT=$(pwd)/$library_path/SuiteSparse
fi

#tar -zxvf camp/cvode-3.4-alpha.tar.gz
cd $library_path/cvode-3.4-alpha
#rm -r build
mkdir build
#rm -rf install
mkdir install
mkdir install/examples
cd build
cmake ..
make install

#./cvode-3.4-alpha/build/examples/cvode/serial/cvRoberts_klu
7 changes: 6 additions & 1 deletion data/CAMP_v1_paper/binned/test_monarch_binned.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,12 @@ while [ true ]
do
echo Attempt $counter

exec_str="../../../camp_v1_paper_binned config_monarch_binned.json interface_monarch_binned.json out/monarch_cb05_soa"
if [ -z ${SLURM_TASK_PID+x} ]; then
exec_str="../../../camp_v1_paper_binned config_monarch_binned.json interface_monarch_binned.json out/monarch_cb05_soa"
else
exec_str="mpirun -v -np 1 --bind-to none ../../../camp_v1_paper_binned config_monarch_binned.json interface_monarch_binned.json out/monarch_cb05_soa"
fi


if ! $exec_str; then
echo Failure "$counter"
Expand Down
Loading