Skip to content

Commit

Permalink
Merge pull request #247 from GEOS-ESM/feature/amumssen/singularity-de…
Browse files Browse the repository at this point in the history
…lete

Updated to clean up scripts for NOT singularity
  • Loading branch information
mathomp4 authored Aug 10, 2023
2 parents dde1c2f + c62c307 commit 941ae8e
Show file tree
Hide file tree
Showing 2 changed files with 139 additions and 120 deletions.
114 changes: 54 additions & 60 deletions scripts/fv3.j
Original file line number Diff line number Diff line change
Expand Up @@ -358,65 +358,58 @@ if ($N_OMP > 1) then
setenv KMP_STACKSIZE 16m
endif

#######################################################################
# Settings for Singularity - EXPERIMENTAL
#######################################################################

# Detect if StandAlone_FV3_Dycore.x is in the current directory
# -------------------------------------------------------------

# If you are using singularity, set the path to the singularity sandbox here
setenv SINGULARITY_SANDBOX @SINGULARITY_SANDBOX

# echo if we are running in singularity
if( $SINGULARITY_SANDBOX != "" ) then
echo "We are running under Singularity"
echo ""
endif

# Detect if StandAlone_FV3_Dycore.x is in the experiment directory
if (-e $EXPDIR/StandAlone_FV3_Dycore.x) then
echo "Found StandAlone_FV3_Dycore.x in $EXPDIR"

# If SINGULARITY_SANDBOX is non-empty and StandAlone_FV3_Dycore.x is found in the experiment directory,
# force the use of StandAlone_FV3_Dycore.x in the installation directory
if( $SINGULARITY_SANDBOX != "" ) then
echo "NOTE: Testing has shown Singularity only works when running with"
echo " the StandAlone_FV3_Dycore.x executable directly from the installation bin directory"
echo ""
echo " So, we will *ignore* the local StandAlone_FV3_Dycore.x and "
echo " instead use $GEOSBIN/StandAlone_FV3_Dycore.x"
echo ""

setenv FV3EXE $GEOSBIN/StandAlone_FV3_Dycore.x
else
echo "Copying $EXPDIR/StandAlone_FV3_Dycore.x to $SCRDIR"
/bin/cp $EXPDIR/StandAlone_FV3_Dycore.x $SCRDIR/StandAlone_FV3_Dycore.x

setenv FV3EXE $SCRDIR/StandAlone_FV3_Dycore.x
endif
else
echo "Using StandAlone_FV3_Dycore.x from $GEOSBIN"

setenv FV3EXE $GEOSBIN/StandAlone_FV3_Dycore.x
endif
echo ""

# If SINGULARITY_SANDBOX is non-empty, then run executable in singularity sandbox
if( $SINGULARITY_SANDBOX != "" ) then
# Load the Singularity module
module load singularity

# Set Singularity Bind Paths. Note: These are dependent on where you are running.
# By default, we'll assume you are running this script from NOBACKUP
setenv REAL_BIND_PATH @REAL_BIND_PATH
setenv SINGULARITY_BIND_PATH "-B ${NOBACKUP}:${NOBACKUP},${REAL_BIND_PATH}:${REAL_BIND_PATH}"

# Set a variable to encapsulate all Singularity details
setenv SINGULARITY_RUN "singularity exec $SINGULARITY_BIND_PATH $SINGULARITY_SANDBOX"
else
setenv SINGULARITY_RUN ""
endif
@SINGULARITY_BUILD #######################################################################
@SINGULARITY_BUILD # Settings for Singularity - EXPERIMENTAL
@SINGULARITY_BUILD #######################################################################
@SINGULARITY_BUILD
@SINGULARITY_BUILD # Detect if StandAlone_FV3_Dycore.x is in the current directory
@SINGULARITY_BUILD # -------------------------------------------------------------
@SINGULARITY_BUILD
@SINGULARITY_BUILD # If you are using singularity, set the path to the singularity sandbox here
@SINGULARITY_BUILD setenv SINGULARITY_SANDBOX @SINGULARITY_SANDBOX
@SINGULARITY_BUILD
@SINGULARITY_BUILD echo "We are running under Singularity"
@SINGULARITY_BUILD echo ""
@SINGULARITY_BUILD
@SINGULARITY_BUILD # Detect if StandAlone_FV3_Dycore.x is in the experiment directory
@SINGULARITY_BUILD if (-e $EXPDIR/StandAlone_FV3_Dycore.x) then
@SINGULARITY_BUILD echo "Found StandAlone_FV3_Dycore.x in $EXPDIR"
@SINGULARITY_BUILD
@SINGULARITY_BUILD echo "NOTE: Testing has shown Singularity only works when running with"
@SINGULARITY_BUILD echo " the StandAlone_FV3_Dycore.x executable directly from the installation bin directory"
@SINGULARITY_BUILD echo ""
@SINGULARITY_BUILD echo " So, we will *ignore* the local StandAlone_FV3_Dycore.x and "
@SINGULARITY_BUILD echo " instead use $GEOSBIN/StandAlone_FV3_Dycore.x"
@SINGULARITY_BUILD echo ""
@SINGULARITY_BUILD endif
@SINGULARITY_BUILD
@SINGULARITY_BUILD echo "Using StandAlone_FV3_Dycore.x from $GEOSBIN"
@SINGULARITY_BUILD setenv FV3EXE $GEOSBIN/StandAlone_FV3_Dycore.x
@SINGULARITY_BUILD
@SINGULARITY_BUILD # Load the Singularity module
@SINGULARITY_BUILD module load singularity
@SINGULARITY_BUILD
@SINGULARITY_BUILD # Set Singularity Bind Paths. Note: These are dependent on where you are running.
@SINGULARITY_BUILD # By default, we'll assume you are running this script from NOBACKUP
@SINGULARITY_BUILD setenv REAL_BIND_PATH @REAL_BIND_PATH
@SINGULARITY_BUILD setenv SINGULARITY_BIND_PATH "-B ${NOBACKUP}:${NOBACKUP},${REAL_BIND_PATH}:${REAL_BIND_PATH}"
@SINGULARITY_BUILD
@SINGULARITY_BUILD # Set a variable to encapsulate all Singularity details
@SINGULARITY_BUILD setenv SINGULARITY_RUN "singularity exec $SINGULARITY_BIND_PATH $SINGULARITY_SANDBOX"

@NATIVE_BUILD # Detect if StandAlone_FV3_Dycore.x is in the experiment directory
@NATIVE_BUILD if (-e $EXPDIR/StandAlone_FV3_Dycore.x) then
@NATIVE_BUILD echo "Found StandAlone_FV3_Dycore.x in $EXPDIR"
@NATIVE_BUILD
@NATIVE_BUILD echo "Copying $EXPDIR/StandAlone_FV3_Dycore.x to $SCRDIR"
@NATIVE_BUILD /bin/cp $EXPDIR/StandAlone_FV3_Dycore.x $SCRDIR/StandAlone_FV3_Dycore.x
@NATIVE_BUILD
@NATIVE_BUILD setenv FV3EXE $SCRDIR/StandAlone_FV3_Dycore.x
@NATIVE_BUILD else
@NATIVE_BUILD echo "Using StandAlone_FV3_Dycore.x from $GEOSBIN"
@NATIVE_BUILD
@NATIVE_BUILD setenv FV3EXE $GEOSBIN/StandAlone_FV3_Dycore.x
@NATIVE_BUILD endif

#######################################################################
# Run the Model
Expand All @@ -432,7 +425,8 @@ else
set IOSERVER_OPTIONS = ""
endif

$RUN_CMD $NPES $SINGULARITY_RUN $FV3EXE $IOSERVER_OPTIONS --logging_config logging.yaml |& tee ${SCRDIR}.log
@SINGULARITY_BUILD $RUN_CMD $NPES $SINGULARITY_RUN $FV3EXE $IOSERVER_OPTIONS --logging_config logging.yaml |& tee ${SCRDIR}.log
@NATIVE_BUILD $RUN_CMD $NPES $FV3EXE $IOSERVER_OPTIONS --logging_config logging.yaml |& tee ${SCRDIR}.log

if( $USE_SHMEM == 1 ) $GEOSBIN/RmShmKeys_sshmpi.csh >& /dev/null

Expand Down
145 changes: 85 additions & 60 deletions scripts/fv3_setup
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ endif
# Set Current Working Path to fv3_setup
# -------------------------------------
setenv ARCH `uname -s`
setenv NODE `uname -n`

if ($ARCH == Darwin) then
set FINDPATH = realpath
else
Expand All @@ -52,6 +54,56 @@ if (! -x ${BINDIR}/StandAlone_FV3_Dycore.x) then
exit 1
endif

######################################################################
# Singularity Sandbox
#######################################################################

# This section is to determine if we are running in a Singularity
# sandbox by looking for the .singularity.d directory. If we are,
# then we set the SINGULARITY_SANDBOX environment variable to the
# path to the sandbox. If we are not, then we set it to an empty
# string.
#
# We also set the REAL_BIND_PATH environment variable. This is used
# to set the bind paths for Singularity runs. REAL_BIND_PATH is the
# physical path to the NOBACKUP directory

setenv KEYFILE ".singularity.d"
setenv singstat 0
setenv ORIGPATH `pwd -P`
cd $GEOSDEF
while ($singstat == 0)
if (-d $KEYFILE) then
setenv singstat 1
setenv SINGPATH `pwd -P`
endif
if ($singstat == 0) then
if ("$cwd" == "/") then
break
else
cd ..
endif
endif
end

if ($singstat == 1) then
setenv SINGULARITY_SANDBOX $SINGPATH
setenv REAL_BIND_PATH `realpath $NOBACKUP`

set USING_SINGULARITY = TRUE
set SINGULARITY_BUILD = ""
set NATIVE_BUILD = "#DELETE"
else
setenv SINGULARITY_SANDBOX ""
setenv REAL_BIND_PATH ""

set USING_SINGULARITY = FALSE
set SINGULARITY_BUILD = "#DELETE"
set NATIVE_BUILD = ""
endif

cd $ORIGPATH

# Set TMPDIR to /tmp due to issues with heredocs in Singularity sandboxes
# -----------------------------------------------------------------------

Expand All @@ -61,7 +113,24 @@ setenv TMPDIR /tmp
# Test for Command Line Flags
#######################################################################

set USER_SITE = ""
set SINGULARITY_RUN_SITE = ""
if ($singstat == 1) then
set USING_SINGULARITY = TRUE

# If we are using Singularity, then where the image was built (say
# AWS) might not match where we run. We at least know NCCS and NAS,
# so we can use that to set the site.

if (($NODE =~ discover*) || ($NODE =~ borg*) || ($NODE =~ warp*)) then
set SINGULARITY_RUN_SITE = "NCCS"
else if (($NODE =~ pfe*) || \
($NODE =~ r[0-9]*i[0-9]*n[0-9]*) || \
($NODE =~ r[0-9]*c[0-9]*t[0-9]*n[0-9]*)) then
set SINGULARITY_RUN_SITE = "NAS"
endif
else
set USING_SINGULARITY = FALSE
endif
while ( $#argv > 0 )
set arg = $argv[1]
shift argv
Expand All @@ -71,23 +140,6 @@ while ( $#argv > 0 )
case --[Cc][Oo][Ll][Oo][Rr]:
goto SETCOLOR

# Testing with singularity found we need the ability to override
# the SITE variable below because images might be built on, say,
# AWS and thus "block" us from running at NCCS.
#
# So, we make a --site flag that allows us to override the SITE
# variable
case -[Ss]:
case --[Ss][Ii][Tt][Ee]:
set USER_SITE = $1
# if USER_SITE is empty, then we error out
if ( "$USER_SITE" == "" ) then
echo "ERROR: --site flag requires a site name"
exit 1
endif
shift argv
breaksw

# Here any string not above will trigger USAGE
case -[Hh]:
case --[Hh][Ee][Ll][Pp]:
Expand All @@ -100,12 +152,10 @@ end
# Determine site
#######################################################################

setenv NODE `uname -n`
setenv ARCH `uname -s`
if ($USER_SITE == "") then
if ($SINGULARITY_RUN_SITE == "") then
setenv SITE `awk '{print $2}' $ETCDIR/SITE.rc`
else
setenv SITE $USER_SITE
setenv SITE $SINGULARITY_RUN_SITE
endif

#######################################################################
Expand Down Expand Up @@ -257,7 +307,6 @@ ASKPROC:

if ( $SITE == 'NCCS' ) then
echo "Enter the ${C1}Processor Type${CN} you wish to run on:"
echo " ${C2}hasw (Haswell)${CN}"
echo " ${C2}sky (Skylake)${CN}"
echo " ${C2}cas (Cascade Lake)${CN} (default)"
echo " "
Expand All @@ -267,13 +316,10 @@ if ( $SITE == 'NCCS' ) then
set MODEL = 'cas'
endif

if( $MODEL != 'hasw' & \
$MODEL != 'sky' & \
if( $MODEL != 'sky' & \
$MODEL != 'cas' ) goto ASKPROC

if ( $MODEL == 'hasw') then
set NCPUS_PER_NODE = 28
else if ($MODEL == 'sky') then
if ($MODEL == 'sky') then
set NCPUS_PER_NODE = 40
else if ($MODEL == 'cas') then
# NCCS currently recommends that users do not run with
Expand Down Expand Up @@ -608,38 +654,6 @@ echo $EXPDIRroot > $HOME/.EXPDIRroot

set GEOSDIR = $GEOSDEF

######################################################################
# Singularity Sandbox
#######################################################################

setenv KEYFILE ".singularity.d"
setenv singstat 0
setenv ORIGPATH `pwd -P`
cd $GEOSDIR
while ($singstat == 0)
if (-d $KEYFILE) then
setenv singstat 1
setenv SINGPATH `pwd -P`
endif
if ($singstat == 0) then
if ("$cwd" == "/") then
break
else
cd ..
endif
endif
end
if ($singstat == 1) then
setenv SINGULARITY_SANDBOX $SINGPATH
else
setenv SINGULARITY_SANDBOX ""
endif

cd $ORIGPATH

setenv REAL_BIND_PATH `realpath $NOBACKUP`


#
# GEOSBIN does point to the bin/ directory in each
#
Expand Down Expand Up @@ -751,10 +765,19 @@ if ( $SITE == 'NCCS' ) then
cat >> $EXPDIR/SETENV.commands << EOF
setenv I_MPI_SHM_HEAP_VSIZE 512
setenv PSM2_MEMORY large
EOF

#Testing at NCCS showed these caused crash at higher res
#Crashed at restart read
if ( $USING_SINGULARITY == FALSE ) then

cat >> $EXPDIR/SETENV.commands << EOF
setenv I_MPI_EXTRA_FILESYSTEM 1
setenv I_MPI_EXTRA_FILESYSTEM_FORCE gpfs
EOF

endif # if NOT Singularity

endif # if NCCS

endif # if mpi
Expand Down Expand Up @@ -790,6 +813,8 @@ s?@GEOSSRC?$GEOSSRC?g
s?@GEOSBIN?$GEOSBIN?g
s?@GEOSETC?$GEOSETC?g
s?@GEOSUTIL?$GEOSUTIL?g
s?@SINGULARITY_BUILD?$SINGULARITY_BUILD?g
s?@NATIVE_BUILD?$NATIVE_BUILD?g
s?@SINGULARITY_SANDBOX?$SINGULARITY_SANDBOX?g
s?@REAL_BIND_PATH?$REAL_BIND_PATH?g
Expand Down

0 comments on commit 941ae8e

Please sign in to comment.