diff --git a/scripts/fv3.j b/scripts/fv3.j index 2818362..127dbbf 100755 --- a/scripts/fv3.j +++ b/scripts/fv3.j @@ -358,65 +358,58 @@ if ($N_OMP > 1) then setenv KMP_STACKSIZE 16m endif -####################################################################### -# Settings for Singularity - EXPERIMENTAL -####################################################################### - -# Detect if StandAlone_FV3_Dycore.x is in the current directory -# ------------------------------------------------------------- - -# If you are using singularity, set the path to the singularity sandbox here -setenv SINGULARITY_SANDBOX @SINGULARITY_SANDBOX - -# echo if we are running in singularity -if( $SINGULARITY_SANDBOX != "" ) then - echo "We are running under Singularity" - echo "" -endif - -# Detect if StandAlone_FV3_Dycore.x is in the experiment directory -if (-e $EXPDIR/StandAlone_FV3_Dycore.x) then - echo "Found StandAlone_FV3_Dycore.x in $EXPDIR" - - # If SINGULARITY_SANDBOX is non-empty and StandAlone_FV3_Dycore.x is found in the experiment directory, - # force the use of StandAlone_FV3_Dycore.x in the installation directory - if( $SINGULARITY_SANDBOX != "" ) then - echo "NOTE: Testing has shown Singularity only works when running with" - echo " the StandAlone_FV3_Dycore.x executable directly from the installation bin directory" - echo "" - echo " So, we will *ignore* the local StandAlone_FV3_Dycore.x and " - echo " instead use $GEOSBIN/StandAlone_FV3_Dycore.x" - echo "" - - setenv FV3EXE $GEOSBIN/StandAlone_FV3_Dycore.x - else - echo "Copying $EXPDIR/StandAlone_FV3_Dycore.x to $SCRDIR" - /bin/cp $EXPDIR/StandAlone_FV3_Dycore.x $SCRDIR/StandAlone_FV3_Dycore.x - - setenv FV3EXE $SCRDIR/StandAlone_FV3_Dycore.x - endif -else - echo "Using StandAlone_FV3_Dycore.x from $GEOSBIN" - - setenv FV3EXE $GEOSBIN/StandAlone_FV3_Dycore.x -endif -echo "" - -# If SINGULARITY_SANDBOX is non-empty, then run executable in singularity sandbox -if( $SINGULARITY_SANDBOX != "" ) then - # Load the Singularity module - module load singularity - - # Set Singularity Bind Paths. Note: These are dependent on where you are running. - # By default, we'll assume you are running this script from NOBACKUP - setenv REAL_BIND_PATH @REAL_BIND_PATH - setenv SINGULARITY_BIND_PATH "-B ${NOBACKUP}:${NOBACKUP},${REAL_BIND_PATH}:${REAL_BIND_PATH}" - - # Set a variable to encapsulate all Singularity details - setenv SINGULARITY_RUN "singularity exec $SINGULARITY_BIND_PATH $SINGULARITY_SANDBOX" -else - setenv SINGULARITY_RUN "" -endif +@SINGULARITY_BUILD ####################################################################### +@SINGULARITY_BUILD # Settings for Singularity - EXPERIMENTAL +@SINGULARITY_BUILD ####################################################################### +@SINGULARITY_BUILD +@SINGULARITY_BUILD # Detect if StandAlone_FV3_Dycore.x is in the current directory +@SINGULARITY_BUILD # ------------------------------------------------------------- +@SINGULARITY_BUILD +@SINGULARITY_BUILD # If you are using singularity, set the path to the singularity sandbox here +@SINGULARITY_BUILD setenv SINGULARITY_SANDBOX @SINGULARITY_SANDBOX +@SINGULARITY_BUILD +@SINGULARITY_BUILD echo "We are running under Singularity" +@SINGULARITY_BUILD echo "" +@SINGULARITY_BUILD +@SINGULARITY_BUILD # Detect if StandAlone_FV3_Dycore.x is in the experiment directory +@SINGULARITY_BUILD if (-e $EXPDIR/StandAlone_FV3_Dycore.x) then +@SINGULARITY_BUILD echo "Found StandAlone_FV3_Dycore.x in $EXPDIR" +@SINGULARITY_BUILD +@SINGULARITY_BUILD echo "NOTE: Testing has shown Singularity only works when running with" +@SINGULARITY_BUILD echo " the StandAlone_FV3_Dycore.x executable directly from the installation bin directory" +@SINGULARITY_BUILD echo "" +@SINGULARITY_BUILD echo " So, we will *ignore* the local StandAlone_FV3_Dycore.x and " +@SINGULARITY_BUILD echo " instead use $GEOSBIN/StandAlone_FV3_Dycore.x" +@SINGULARITY_BUILD echo "" +@SINGULARITY_BUILD endif +@SINGULARITY_BUILD +@SINGULARITY_BUILD echo "Using StandAlone_FV3_Dycore.x from $GEOSBIN" +@SINGULARITY_BUILD setenv FV3EXE $GEOSBIN/StandAlone_FV3_Dycore.x +@SINGULARITY_BUILD +@SINGULARITY_BUILD # Load the Singularity module +@SINGULARITY_BUILD module load singularity +@SINGULARITY_BUILD +@SINGULARITY_BUILD # Set Singularity Bind Paths. Note: These are dependent on where you are running. +@SINGULARITY_BUILD # By default, we'll assume you are running this script from NOBACKUP +@SINGULARITY_BUILD setenv REAL_BIND_PATH @REAL_BIND_PATH +@SINGULARITY_BUILD setenv SINGULARITY_BIND_PATH "-B ${NOBACKUP}:${NOBACKUP},${REAL_BIND_PATH}:${REAL_BIND_PATH}" +@SINGULARITY_BUILD +@SINGULARITY_BUILD # Set a variable to encapsulate all Singularity details +@SINGULARITY_BUILD setenv SINGULARITY_RUN "singularity exec $SINGULARITY_BIND_PATH $SINGULARITY_SANDBOX" + +@NATIVE_BUILD # Detect if StandAlone_FV3_Dycore.x is in the experiment directory +@NATIVE_BUILD if (-e $EXPDIR/StandAlone_FV3_Dycore.x) then +@NATIVE_BUILD echo "Found StandAlone_FV3_Dycore.x in $EXPDIR" +@NATIVE_BUILD +@NATIVE_BUILD echo "Copying $EXPDIR/StandAlone_FV3_Dycore.x to $SCRDIR" +@NATIVE_BUILD /bin/cp $EXPDIR/StandAlone_FV3_Dycore.x $SCRDIR/StandAlone_FV3_Dycore.x +@NATIVE_BUILD +@NATIVE_BUILD setenv FV3EXE $SCRDIR/StandAlone_FV3_Dycore.x +@NATIVE_BUILD else +@NATIVE_BUILD echo "Using StandAlone_FV3_Dycore.x from $GEOSBIN" +@NATIVE_BUILD +@NATIVE_BUILD setenv FV3EXE $GEOSBIN/StandAlone_FV3_Dycore.x +@NATIVE_BUILD endif ####################################################################### # Run the Model @@ -432,7 +425,8 @@ else set IOSERVER_OPTIONS = "" endif -$RUN_CMD $NPES $SINGULARITY_RUN $FV3EXE $IOSERVER_OPTIONS --logging_config logging.yaml |& tee ${SCRDIR}.log +@SINGULARITY_BUILD $RUN_CMD $NPES $SINGULARITY_RUN $FV3EXE $IOSERVER_OPTIONS --logging_config logging.yaml |& tee ${SCRDIR}.log +@NATIVE_BUILD $RUN_CMD $NPES $FV3EXE $IOSERVER_OPTIONS --logging_config logging.yaml |& tee ${SCRDIR}.log if( $USE_SHMEM == 1 ) $GEOSBIN/RmShmKeys_sshmpi.csh >& /dev/null diff --git a/scripts/fv3_setup b/scripts/fv3_setup index a132487..6cf0679 100755 --- a/scripts/fv3_setup +++ b/scripts/fv3_setup @@ -34,6 +34,8 @@ endif # Set Current Working Path to fv3_setup # ------------------------------------- setenv ARCH `uname -s` +setenv NODE `uname -n` + if ($ARCH == Darwin) then set FINDPATH = realpath else @@ -52,6 +54,56 @@ if (! -x ${BINDIR}/StandAlone_FV3_Dycore.x) then exit 1 endif +###################################################################### +# Singularity Sandbox +####################################################################### + +# This section is to determine if we are running in a Singularity +# sandbox by looking for the .singularity.d directory. If we are, +# then we set the SINGULARITY_SANDBOX environment variable to the +# path to the sandbox. If we are not, then we set it to an empty +# string. +# +# We also set the REAL_BIND_PATH environment variable. This is used +# to set the bind paths for Singularity runs. REAL_BIND_PATH is the +# physical path to the NOBACKUP directory + +setenv KEYFILE ".singularity.d" +setenv singstat 0 +setenv ORIGPATH `pwd -P` +cd $GEOSDEF +while ($singstat == 0) + if (-d $KEYFILE) then + setenv singstat 1 + setenv SINGPATH `pwd -P` + endif + if ($singstat == 0) then + if ("$cwd" == "/") then + break + else + cd .. + endif + endif +end + +if ($singstat == 1) then + setenv SINGULARITY_SANDBOX $SINGPATH + setenv REAL_BIND_PATH `realpath $NOBACKUP` + + set USING_SINGULARITY = TRUE + set SINGULARITY_BUILD = "" + set NATIVE_BUILD = "#DELETE" +else + setenv SINGULARITY_SANDBOX "" + setenv REAL_BIND_PATH "" + + set USING_SINGULARITY = FALSE + set SINGULARITY_BUILD = "#DELETE" + set NATIVE_BUILD = "" +endif + +cd $ORIGPATH + # Set TMPDIR to /tmp due to issues with heredocs in Singularity sandboxes # ----------------------------------------------------------------------- @@ -61,7 +113,24 @@ setenv TMPDIR /tmp # Test for Command Line Flags ####################################################################### -set USER_SITE = "" +set SINGULARITY_RUN_SITE = "" +if ($singstat == 1) then + set USING_SINGULARITY = TRUE + + # If we are using Singularity, then where the image was built (say + # AWS) might not match where we run. We at least know NCCS and NAS, + # so we can use that to set the site. + + if (($NODE =~ discover*) || ($NODE =~ borg*) || ($NODE =~ warp*)) then + set SINGULARITY_RUN_SITE = "NCCS" + else if (($NODE =~ pfe*) || \ + ($NODE =~ r[0-9]*i[0-9]*n[0-9]*) || \ + ($NODE =~ r[0-9]*c[0-9]*t[0-9]*n[0-9]*)) then + set SINGULARITY_RUN_SITE = "NAS" + endif +else + set USING_SINGULARITY = FALSE +endif while ( $#argv > 0 ) set arg = $argv[1] shift argv @@ -71,23 +140,6 @@ while ( $#argv > 0 ) case --[Cc][Oo][Ll][Oo][Rr]: goto SETCOLOR - # Testing with singularity found we need the ability to override - # the SITE variable below because images might be built on, say, - # AWS and thus "block" us from running at NCCS. - # - # So, we make a --site flag that allows us to override the SITE - # variable - case -[Ss]: - case --[Ss][Ii][Tt][Ee]: - set USER_SITE = $1 - # if USER_SITE is empty, then we error out - if ( "$USER_SITE" == "" ) then - echo "ERROR: --site flag requires a site name" - exit 1 - endif - shift argv - breaksw - # Here any string not above will trigger USAGE case -[Hh]: case --[Hh][Ee][Ll][Pp]: @@ -100,12 +152,10 @@ end # Determine site ####################################################################### -setenv NODE `uname -n` -setenv ARCH `uname -s` -if ($USER_SITE == "") then +if ($SINGULARITY_RUN_SITE == "") then setenv SITE `awk '{print $2}' $ETCDIR/SITE.rc` else - setenv SITE $USER_SITE + setenv SITE $SINGULARITY_RUN_SITE endif ####################################################################### @@ -257,7 +307,6 @@ ASKPROC: if ( $SITE == 'NCCS' ) then echo "Enter the ${C1}Processor Type${CN} you wish to run on:" - echo " ${C2}hasw (Haswell)${CN}" echo " ${C2}sky (Skylake)${CN}" echo " ${C2}cas (Cascade Lake)${CN} (default)" echo " " @@ -267,13 +316,10 @@ if ( $SITE == 'NCCS' ) then set MODEL = 'cas' endif - if( $MODEL != 'hasw' & \ - $MODEL != 'sky' & \ + if( $MODEL != 'sky' & \ $MODEL != 'cas' ) goto ASKPROC - if ( $MODEL == 'hasw') then - set NCPUS_PER_NODE = 28 - else if ($MODEL == 'sky') then + if ($MODEL == 'sky') then set NCPUS_PER_NODE = 40 else if ($MODEL == 'cas') then # NCCS currently recommends that users do not run with @@ -608,38 +654,6 @@ echo $EXPDIRroot > $HOME/.EXPDIRroot set GEOSDIR = $GEOSDEF -###################################################################### -# Singularity Sandbox -####################################################################### - -setenv KEYFILE ".singularity.d" -setenv singstat 0 -setenv ORIGPATH `pwd -P` -cd $GEOSDIR -while ($singstat == 0) - if (-d $KEYFILE) then - setenv singstat 1 - setenv SINGPATH `pwd -P` - endif - if ($singstat == 0) then - if ("$cwd" == "/") then - break - else - cd .. - endif - endif -end -if ($singstat == 1) then - setenv SINGULARITY_SANDBOX $SINGPATH -else - setenv SINGULARITY_SANDBOX "" -endif - -cd $ORIGPATH - -setenv REAL_BIND_PATH `realpath $NOBACKUP` - - # # GEOSBIN does point to the bin/ directory in each # @@ -751,10 +765,19 @@ if ( $SITE == 'NCCS' ) then cat >> $EXPDIR/SETENV.commands << EOF setenv I_MPI_SHM_HEAP_VSIZE 512 setenv PSM2_MEMORY large +EOF + +#Testing at NCCS showed these caused crash at higher res +#Crashed at restart read +if ( $USING_SINGULARITY == FALSE ) then + +cat >> $EXPDIR/SETENV.commands << EOF setenv I_MPI_EXTRA_FILESYSTEM 1 setenv I_MPI_EXTRA_FILESYSTEM_FORCE gpfs EOF +endif # if NOT Singularity + endif # if NCCS endif # if mpi @@ -790,6 +813,8 @@ s?@GEOSSRC?$GEOSSRC?g s?@GEOSBIN?$GEOSBIN?g s?@GEOSETC?$GEOSETC?g s?@GEOSUTIL?$GEOSUTIL?g +s?@SINGULARITY_BUILD?$SINGULARITY_BUILD?g +s?@NATIVE_BUILD?$NATIVE_BUILD?g s?@SINGULARITY_SANDBOX?$SINGULARITY_SANDBOX?g s?@REAL_BIND_PATH?$REAL_BIND_PATH?g