Skip to content

Commit

Permalink
Merge pull request #272 from GEOS-ESM/develop
Browse files Browse the repository at this point in the history
GitFlow: Merge Develop into Main
  • Loading branch information
mathomp4 authored Feb 21, 2024
2 parents 4ebb71e + 6930287 commit a0d924a
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 68 deletions.
6 changes: 3 additions & 3 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
version: 2.1

# Anchors in case we need to override the defaults from the orb
#baselibs_version: &baselibs_version v7.14.0
#bcs_version: &bcs_version v11.1.0
#baselibs_version: &baselibs_version v7.17.0
#bcs_version: &bcs_version v11.3.0


orbs:
ci: geos-esm/circleci-tools@1
ci: geos-esm/circleci-tools@2

workflows:
build-test:
Expand Down
159 changes: 94 additions & 65 deletions scripts/fv3_setup
Original file line number Diff line number Diff line change
Expand Up @@ -162,26 +162,8 @@ endif
# Test for Compiler and MPI Setup
#######################################################################

setenv BASEDIR `awk '{print $2}' $ETCDIR/BASEDIR.rc`

if ( `echo $BASEDIR | grep -i mvapich2` != '') then
set MPI = mvapich2
else if ( `echo $BASEDIR | grep -i mpich` != '') then
set MPI = mpich
else if ( `echo $BASEDIR | grep -i openmpi` != '') then
set MPI = openmpi
else if ( `echo $BASEDIR | grep -i hpcx` != '') then
set MPI = openmpi
else if ( `echo $BASEDIR | grep -i impi` != '') then
set MPI = intelmpi
else if ( `echo $BASEDIR | grep -i intelmpi` != '') then
set MPI = intelmpi
else if ( `echo $BASEDIR | grep -i mpt` != '') then
set MPI = mpt
else
# Assume default is Intel MPI in case of older baselibs
set MPI = intelmpi
endif
# Get MPI stack from CMake
set MPI_STACK = @MPI_STACK@

#######################################################################
# Enter Experiment Specific Run Parameters
Expand Down Expand Up @@ -310,7 +292,6 @@ if ( $SITE == 'NCCS' ) then
set BUILT_ON_SLES15 = @BUILT_ON_SLES15@

if ("$BUILT_ON_SLES15" == "TRUE") then
set DEFAULT_MODEL = 'mil'
echo "Enter the ${C1}Processor Type${CN} you wish to run on:"
echo " ${C2}mil (Milan)${CN} (default)"
echo " "
Expand All @@ -329,7 +310,7 @@ if ( $SITE == 'NCCS' ) then
else
echo "Enter the ${C1}Processor Type${CN} you wish to run on:"
echo " ${C2}sky (Skylake)${CN}"
echo " ${C2}cas (Cascade Lake)${CN} (default)"
echo " ${C2}cas (Cascade Lake) (default)${CN}"
echo " "
set MODEL = `echo $<`
set MODEL = `echo $MODEL | tr "[:upper:]" "[:lower:]"`
Expand Down Expand Up @@ -358,20 +339,17 @@ else if ( $SITE == 'NAS' ) then
echo "Enter the ${C1}Processor Type${CN} you wish to run on:"
echo " ${C2}has (Haswell)${CN}"
echo " ${C2}bro (Broadwell)${CN}"
echo " ${C2}sky (Skylake)${CN}"
echo " ${C2}cas (Cascade Lake)${CN} (default)"
echo " ${C2}sky (Skylake)${CN} (default)"
echo " ${C2}cas (Cascade Lake)${CN}"
echo " ${C2}rom (AMD Rome)${CN}"
echo " "
echo " NOTE 1: Due to how FV3 is compiled by default, Sandy Bridge"
echo " and Ivy Bridge are not supported by current GEOS"
echo " "
echo " NOTE 2: GEOS is non-zero-diff when running on AMD Rome"
echo " compared to the other Intel nodes."
echo " NOTE Due to how FV3 is compiled by default, Sandy Bridge"
echo " and Ivy Bridge are not supported by current GEOS"
echo " "
set MODEL = `echo $<`
set MODEL = `echo $MODEL | tr "[:upper:]" "[:lower:]"`
if ( .$MODEL == .) then
set MODEL = 'cas'
set MODEL = 'sky'
endif

if( $MODEL != 'has' & \
Expand Down Expand Up @@ -715,60 +693,91 @@ echo $GROUP > $HOME/.GROUProot
# Set Recommended MPI Stack Settings
#######################################################################

# By default do not write restarts by oserver
set RESTART_BY_OSERVER = NO

/bin/rm -f $EXPDIR/SETENV.commands

if( $MPI == openmpi ) then
if( $MPI_STACK == openmpi ) then

# Open MPI and GEOS has issues with restart writing. Having the
# oserver write them can be orders of magnitude faster

set RESTART_BY_OSERVER = YES

# This turns off an annoying warning when running
# Open MPI on a system where TMPDIRs are on a networked
# file system
# Testing by Bill Putman determined some useful
# Open MPI parameters. Testing shows these work
# on both OSs at NCCS and on macOS

cat > $EXPDIR/SETENV.commands << EOF
setenv OMPI_MCA_shmem_mmap_enable_nfs_warning 0
# Turn off warning about TMPDIR on NFS
setenv OMPI_MCA_shmem_mmap_enable_nfs_warning 0
# pre-connect MPI procs on mpi_init
setenv OMPI_MCA_mpi_preconnect_all 1
setenv OMPI_MCA_coll_tuned_bcast_algorithm 7
setenv OMPI_MCA_coll_tuned_scatter_algorithm 2
setenv OMPI_MCA_coll_tuned_reduce_scatter_algorithm 3
setenv OMPI_MCA_coll_tuned_allreduce_algorithm 3
setenv OMPI_MCA_coll_tuned_allgather_algorithm 4
setenv OMPI_MCA_coll_tuned_allgatherv_algorithm 3
setenv OMPI_MCA_coll_tuned_gather_algorithm 1
setenv OMPI_MCA_coll_tuned_barrier_algorithm 0
# required for a tuned flag to be effective
setenv OMPI_MCA_coll_tuned_use_dynamic_rules 1
# disable file locks
setenv OMPI_MCA_sharedfp "^lockedfile,individual"
EOF

# The below settings seem to be recommended for hybrid
# systems using MVAPICH2 but could change
# systems using MVAPICH but could change

else if( $MPI == mvapich ) then
else if( $MPI_STACK == mvapich ) then

# MVAPICH and GEOS has issues with restart writing. Having the
# oserver write them seems to...work
set RESTART_BY_OSERVER = YES

cat > $EXPDIR/SETENV.commands << EOF
setenv MV2_ENABLE_AFFINITY 0
setenv SLURM_DISTRIBUTION block
setenv MV2_MPIRUN_TIMEOUT 100
setenv MV2_GATHERV_SSEND_THRESHOLD 256
setenv MV2_ENABLE_AFFINITY 0
setenv SLURM_DISTRIBUTION block
setenv MV2_MPIRUN_TIMEOUT 100
setenv MV2_GATHERV_SSEND_THRESHOLD 256
EOF

else if( $MPI == mpt ) then
else if( $MPI_STACK == mpt ) then

cat > $EXPDIR/SETENV.commands << EOF
setenv MPI_COLL_REPRODUCIBLE
setenv SLURM_DISTRIBUTION block
setenv MPI_COLL_REPRODUCIBLE
setenv SLURM_DISTRIBUTION block
#setenv MPI_DISPLAY_SETTINGS 1
#setenv MPI_VERBOSE 1
#setenv MPI_DISPLAY_SETTINGS 1
#setenv MPI_VERBOSE 1
setenv MPI_MEMMAP_OFF
unsetenv MPI_NUM_MEMORY_REGIONS
setenv MPI_XPMEM_ENABLED yes
unsetenv SUPPRESS_XPMEM_TRIM_THRESH
unsetenv MPI_MEMMAP_OFF
unsetenv MPI_NUM_MEMORY_REGIONS
setenv MPI_XPMEM_ENABLED yes
unsetenv SUPPRESS_XPMEM_TRIM_THRESH
setenv MPI_LAUNCH_TIMEOUT 40
setenv MPI_LAUNCH_TIMEOUT 40
setenv MPI_COMM_MAX 1024
setenv MPI_GROUP_MAX 1024
setenv MPI_BUFS_PER_PROC 256
# For some reason, PMI_RANK is randomly set and interferes
# with binarytile.x and other executables.
unsetenv PMI_RANK
# For some reason, PMI_RANK is randomly set and interferes
# with binarytile.x and other executables.
unsetenv PMI_RANK
# Often when debugging on MPT, the traceback from Intel Fortran
# is "absorbed" and only MPT's errors are displayed. To allow the
# compiler's traceback to be displayed, uncomment this environment
# variable
#setenv FOR_IGNORE_EXCEPTIONS false
# Often when debugging on MPT, the traceback from Intel Fortran
# is "absorbed" and only MPT's errors are displayed. To allow the
# compiler's traceback to be displayed, uncomment this environment
# variable
#setenv FOR_IGNORE_EXCEPTIONS false
EOF

else if( $MPI == intelmpi ) then
else if( $MPI_STACK == intelmpi ) then

cat > $EXPDIR/SETENV.commands << EOF
#setenv MPS_STAT_LEVEL 4
Expand Down Expand Up @@ -800,13 +809,32 @@ EOF

endif # if NOT Singularity

# Testing on SLES15 showed that the mlx provider did not seem
# to work at scale. So we move to use the verbs provider. Note:
# still seems to have issues at c720
# Testing by Bill Putman found these to be
# useful flags with Intel MPI on SLES15 on the
# Milan nodes.
# Note 1: Testing by NCCS shows the PSM3 provider
# runs on the Infiniband fabric. Tests show it runs
# up to C720.
# Note 2: When the Cascade Lakes are moved to
# SLES15, these will need to be Milan-only flags
# as Intel MPI will probably work just fine with
# Intel chips.
if ("$BUILT_ON_SLES15" == "TRUE") then
cat >> $EXPDIR/SETENV.commands << EOF
setenv I_MPI_OFI_PROVIDER verbs
setenv I_MPI_COLL_EXTERNAL 0
setenv I_MPI_FALLBACK 0
setenv I_MPI_FABRICS ofi
setenv I_MPI_OFI_PROVIDER psm3
setenv I_MPI_ADJUST_SCATTER 2
setenv I_MPI_ADJUST_SCATTERV 2
setenv I_MPI_ADJUST_GATHER 2
setenv I_MPI_ADJUST_GATHERV 3
setenv I_MPI_ADJUST_ALLGATHER 3
setenv I_MPI_ADJUST_ALLGATHERV 3
setenv I_MPI_ADJUST_ALLREDUCE 12
setenv I_MPI_ADJUST_REDUCE 10
setenv I_MPI_ADJUST_BCAST 11
setenv I_MPI_ADJUST_REDUCE_SCATTER 4
setenv I_MPI_ADJUST_BARRIER 9
EOF

endif # if SLES15
Expand All @@ -815,6 +843,7 @@ endif # if NCCS

endif # if mpi


#######################################################################
# Create Local Scripts and Resource Files
#######################################################################
Expand Down

0 comments on commit a0d924a

Please sign in to comment.