From f190cbf7d44a1ae09c921651a3d701cb6b5a09d5 Mon Sep 17 00:00:00 2001 From: David Burrows Date: Fri, 4 Oct 2024 07:38:06 -0400 Subject: [PATCH 1/8] resource updates for C5 C768 GSI --- env/GAEA.env | 200 +++++++++++++++++++++++++- parm/config/gfs/config.resources.GAEA | 40 ++++++ 2 files changed, 234 insertions(+), 6 deletions(-) diff --git a/env/GAEA.env b/env/GAEA.env index 7736e0f1ea..375fada031 100755 --- a/env/GAEA.env +++ b/env/GAEA.env @@ -12,11 +12,24 @@ step=$1 export launcher="srun -l --export=ALL" export mpmd_opt="--multi-prog --output=mpmd.%j.%t.out" +#export POSTAMBLE_CMD='report-mem' + +# Configure MPI environment +#export I_MPI_ADJUST_ALLREDUCE=5 +#export MPI_BUFS_PER_PROC=2048 +#export MPI_BUFS_PER_HOST=2048 +#export MPI_GROUP_MAX=256 +#export MPI_MEMMAP_OFF=1 +#export MP_STDOUTMODE="ORDERED" export OMP_STACKSIZE=2048000 export NTHSTACK=1024000000 +#export LD_BIND_NOW=1 -ulimit -s unlimited -ulimit -a +# Setting stacksize to unlimited on login nodes is prohibited +if [[ -n "${SLURM_JOB_ID:-}" ]]; then + ulimit -s unlimited + ulimit -a +fi # Calculate common variables # Check first if the dependent variables are set @@ -26,21 +39,125 @@ if [[ -n "${ntasks:-}" && -n "${max_tasks_per_node:-}" && -n "${tasks_per_node:- NTHREADS1=${threads_per_task:-1} [[ ${NTHREADSmax} -gt ${max_threads_per_task} ]] && NTHREADSmax=${max_threads_per_task} [[ ${NTHREADS1} -gt ${max_threads_per_task} ]] && NTHREADS1=${max_threads_per_task} - # This may be useful when Gaea is fully ported, so ignore SC warning - # shellcheck disable=SC2034 APRUN_default="${launcher} -n ${ntasks}" else echo "ERROR config.resources must be sourced before sourcing GAEA.env" exit 2 fi -if [[ "${step}" = "prep" ]]; then +if [[ "${step}" = "prep" ]] || [[ "${step}" = "prepbufr" ]]; then export POE="NO" export BACK="NO" export sys_tp="GAEA" export launcher_PREP="srun" +elif [[ "${step}" = "prepsnowobs" ]]; then + + export APRUN_CALCFIMS="${APRUN_default}" + +elif [[ "${step}" = "prep_emissions" ]]; then + + export APRUN="${APRUN_default}" + +elif [[ "${step}" = "waveinit" ]] || [[ "${step}" = "waveprep" ]] || [[ "${step}" = "wavepostsbs" ]] || [[ "${step}" = "wavepostbndpnt" ]] || [[ "${step}" = "wavepostbndpntbll" ]] || [[ "${step}" = "wavepostpnt" ]]; then + + export CFP_MP="YES" + if [[ "${step}" = "waveprep" ]]; then export MP_PULSE=0 ; fi + export wavempexec=${launcher} + export wave_mpmd=${mpmd_opt} + +elif [[ "${step}" = "atmanlvar" ]]; then + + export NTHREADS_ATMANLVAR=${NTHREADSmax} + export APRUN_ATMANLVAR="${APRUN_default} --cpus-per-task=${NTHREADS_ATMANLVAR}" + +elif [[ "${step}" = "atmensanlobs" ]]; then + + export NTHREADS_ATMENSANLOBS=${NTHREADSmax} + export APRUN_ATMENSANLOBS="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLOBS}" + +elif [[ "${step}" = "atmensanlsol" ]]; then + + export NTHREADS_ATMENSANLSOL=${NTHREADSmax} + export APRUN_ATMENSANLSOL="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLSOL}" + +elif [[ "${step}" = "atmensanlletkf" ]]; then + + export NTHREADS_ATMENSANLLETKF=${NTHREADSmax} + export APRUN_ATMENSANLLETKF="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLLETKF}" + +elif [[ "${step}" = "atmensanlfv3inc" ]]; then + + export NTHREADS_ATMENSANLFV3INC=${NTHREADSmax} + export APRUN_ATMENSANLFV3INC="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLFV3INC}" + +elif [[ "${step}" = "aeroanlvar" ]]; then + + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + + export NTHREADS_AEROANL=${NTHREADSmax} + export APRUN_AEROANL="${APRUN_default} --cpus-per-task=${NTHREADS_AEROANL}" + +elif [[ "${step}" = "aeroanlgenb" ]]; then + + export NTHREADS_AEROANLGENB=${NTHREADSmax} + export APRUN_AEROANLGENB="${APRUN_default} --cpus-per-task=${NTHREADS_AEROANLGENB}" + +elif [[ "${step}" = "atmanlfv3inc" ]]; then + + export NTHREADS_ATMANLFV3INC=${NTHREADSmax} + export APRUN_ATMANLFV3INC="${APRUN_default} --cpus-per-task=${NTHREADS_ATMANLFV3INC}" + +elif [[ "${step}" = "prepobsaero" ]]; then + + export NTHREADS_PREPOBSAERO=${NTHREADS1} + export APRUN_PREPOBSAERO="${APRUN_default} --cpus-per-task=${NTHREADS_PREPOBSAERO}" + +elif [[ "${step}" = "snowanl" ]]; then + + export NTHREADS_SNOWANL=${NTHREADSmax} + export APRUN_SNOWANL="${APRUN_default} --cpus-per-task=${NTHREADS_SNOWANL}" + + export APRUN_APPLY_INCR="${launcher} -n 6" + +elif [[ "${step}" = "esnowrecen" ]]; then + + export NTHREADS_ESNOWRECEN=${NTHREADSmax} + export APRUN_ESNOWRECEN="${APRUN_default} --cpus-per-task=${NTHREADS_ESNOWRECEN}" + + export APRUN_APPLY_INCR="${launcher} -n 6" + +elif [[ "${step}" = "marinebmat" ]]; then + + export APRUNCFP="${launcher} -n \$ncmd --multi-prog" + export APRUN_MARINEBMAT="${APRUN_default}" + +elif [[ "${step}" = "marinebmat" ]]; then + + export APRUNCFP="${launcher} -n \$ncmd --multi-prog" + export APRUN_MARINEBMAT="${APRUN_default}" + +elif [[ "${step}" = "marineanlvar" ]]; then + + export APRUNCFP="${launcher} -n \$ncmd --multi-prog" + export APRUN_MARINEANLVAR="${APRUN_default}" + +elif [[ "${step}" = "marineanlchkpt" ]]; then + + export APRUNCFP="${launcher} -n \$ncmd --multi-prog" + export APRUN_MARINEANLCHKPT="${APRUN_default}" + +elif [[ "${step}" = "ocnanalecen" ]]; then + + export NTHREADS_OCNANALECEN=${NTHREADSmax} + export APRUN_OCNANALECEN="${APRUN_default} --cpus-per-task=${NTHREADS_OCNANALECEN}" + +elif [[ "${step}" = "marineanalletkf" ]]; then + + export NTHREADS_MARINEANALLETKF=${NTHREADSmax} + export APRUN_MARINEANALLETKF="${APRUN_default} --cpus-per-task=${NTHREADS_MARINEANALLETKF}" + elif [[ "${step}" = "anal" ]] || [[ "${step}" = "analcalc" ]]; then export MKL_NUM_THREADS=4 @@ -71,7 +188,28 @@ elif [[ "${step}" = "sfcanl" ]]; then export NTHREADS_CYCLE=${threads_per_task:-14} export APRUN_CYCLE="${APRUN_default} --cpus-per-task=${NTHREADS_CYCLE}" -elif [[ "${step}" = "fcst" ]]; then +elif [[ "${step}" = "eobs" ]]; then + + export MKL_NUM_THREADS=4 + export MKL_CBWR=AUTO + + export NTHREADS_GSI=${NTHREADSmax} + export APRUN_GSI="${APRUN_default} --cpus-per-task=${NTHREADS_GSI}" + + export CFP_MP=${CFP_MP:-"YES"} + export USE_CFP=${USE_CFP:-"YES"} + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + +elif [[ "${step}" = "eupd" ]]; then + + export NTHREADS_ENKF=${NTHREADSmax} + export APRUN_ENKF="${launcher} -n ${ntasks_enkf:-${ntasks}} --cpus-per-task=${NTHREADS_ENKF}" + + export CFP_MP=${CFP_MP:-"YES"} + export USE_CFP=${USE_CFP:-"YES"} + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + +elif [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then (( nnodes = (ntasks+tasks_per_node-1)/tasks_per_node )) (( ufs_ntasks = nnodes*tasks_per_node )) @@ -93,6 +231,56 @@ elif [[ "${step}" = "oceanice_products" ]]; then export NTHREADS_OCNICEPOST=${NTHREADS1} export APRUN_OCNICEPOST="${launcher} -n 1 --cpus-per-task=${NTHREADS_OCNICEPOST}" +elif [[ "${step}" = "ecen" ]]; then + + export NTHREADS_ECEN=${NTHREADSmax} + export APRUN_ECEN="${APRUN_default} --cpus-per-task=${NTHREADS_ECEN}" + + export NTHREADS_CHGRES=${threads_per_task_chgres:-12} + [[ ${NTHREADS_CHGRES} -gt ${max_tasks_per_node} ]] && export NTHREADS_CHGRES=${max_tasks_per_node} + export APRUN_CHGRES="time" + + export NTHREADS_CALCINC=${threads_per_task_calcinc:-1} + [[ ${NTHREADS_CALCINC} -gt ${max_threads_per_task} ]] && export NTHREADS_CALCINC=${max_threads_per_task} + export APRUN_CALCINC="${APRUN_default} --cpus-per-task=${NTHREADS_CALCINC}" + +elif [[ "${step}" = "esfc" ]]; then + + export NTHREADS_ESFC=${threads_per_task_esfc:-${max_threads_per_task}} + export APRUN_ESFC="${APRUN_default} --cpus-per-task=${NTHREADS_ESFC}" + + export NTHREADS_CYCLE=${threads_per_task_cycle:-14} + [[ ${NTHREADS_CYCLE} -gt ${max_tasks_per_node} ]] && export NTHREADS_CYCLE=${max_tasks_per_node} + export APRUN_CYCLE="${APRUN_default} --cpus-per-task=${NTHREADS_CYCLE}" + +elif [[ "${step}" = "epos" ]]; then + + export NTHREADS_EPOS=${NTHREADSmax} + export APRUN_EPOS="${APRUN_default} --cpus-per-task=${NTHREADS_EPOS}" + +elif [[ "${step}" = "postsnd" ]]; then + + export CFP_MP="YES" + + export NTHREADS_POSTSND=${NTHREADS1} + export APRUN_POSTSND="${APRUN_default} --cpus-per-task=${NTHREADS_POSTSND}" + + export NTHREADS_POSTSNDCFP=${threads_per_task_postsndcfp:-1} + [[ ${NTHREADS_POSTSNDCFP} -gt ${max_threads_per_task} ]] && export NTHREADS_POSTSNDCFP=${max_threads_per_task} + export APRUN_POSTSNDCFP="${launcher} -n ${ntasks_postsndcfp} ${mpmd_opt}" + +elif [[ "${step}" = "awips" ]]; then + + export NTHREADS_AWIPS=${NTHREADS1} + export APRUN_AWIPSCFP="${APRUN_default} ${mpmd_opt}" + +elif [[ "${step}" = "gempak" ]]; then + + export CFP_MP="YES" + + export NTHREADS_GEMPAK=${NTHREADS1} + [[ ${NTHREADS_GEMPAK} -gt ${max_threads_per_task} ]] && export NTHREADS_GEMPAK=${max_threads_per_task} + elif [[ "${step}" = "fit2obs" ]]; then export NTHREADS_FIT2OBS=${NTHREADS1} diff --git a/parm/config/gfs/config.resources.GAEA b/parm/config/gfs/config.resources.GAEA index c50601da00..4266de2545 100644 --- a/parm/config/gfs/config.resources.GAEA +++ b/parm/config/gfs/config.resources.GAEA @@ -21,6 +21,46 @@ case ${step} in esac ;; + "eupd") + # update ntasks to 80 and threads_per_task to 20 + case ${CASE} in + "C768") + export ntasks=80 + export threads_per_task=20 + ;; + esac + export tasks_per_node=$(( max_tasks_per_node / threads_per_task )) + ;; + + "analcalc") + # decrease tasks_per_node 127 to 64 + case ${CASE} in + "C768") + export tasks_per_node=64 + ;; + esac + ;; + + "upp") + # decrease tasks_per_node 120 to 60 + case ${CASE} in + "C768") + export tasks_per_node=60 + ;; + esac + ;; + + "fcst") + # increase WRTTASK_PER_GROUP_PER_THREAD_PER_TILE_{GDAS,GFS} + case ${CASE} in + "C768") + export WRTTASK_PER_GROUP_PER_THREAD_PER_TILE_GDAS=20 + export WRTTASK_PER_GROUP_PER_THREAD_PER_TILE_GFS=25 + ;; + *) + ;; + esac + ;; *) ;; From daedec8429be2feab51a526188bfa83e31528927 Mon Sep 17 00:00:00 2001 From: David Burrows Date: Fri, 4 Oct 2024 09:10:53 -0400 Subject: [PATCH 2/8] adjustments to resource location --- env/GAEA.env | 161 +------------------------- parm/config/gfs/config.resources.GAEA | 6 + 2 files changed, 7 insertions(+), 160 deletions(-) diff --git a/env/GAEA.env b/env/GAEA.env index 375fada031..e4ec3bd364 100755 --- a/env/GAEA.env +++ b/env/GAEA.env @@ -12,18 +12,8 @@ step=$1 export launcher="srun -l --export=ALL" export mpmd_opt="--multi-prog --output=mpmd.%j.%t.out" -#export POSTAMBLE_CMD='report-mem' - -# Configure MPI environment -#export I_MPI_ADJUST_ALLREDUCE=5 -#export MPI_BUFS_PER_PROC=2048 -#export MPI_BUFS_PER_HOST=2048 -#export MPI_GROUP_MAX=256 -#export MPI_MEMMAP_OFF=1 -#export MP_STDOUTMODE="ORDERED" export OMP_STACKSIZE=2048000 export NTHSTACK=1024000000 -#export LD_BIND_NOW=1 # Setting stacksize to unlimited on login nodes is prohibited if [[ -n "${SLURM_JOB_ID:-}" ]]; then @@ -45,118 +35,13 @@ else exit 2 fi -if [[ "${step}" = "prep" ]] || [[ "${step}" = "prepbufr" ]]; then +if [[ "${step}" = "prep" ]]; then export POE="NO" export BACK="NO" export sys_tp="GAEA" export launcher_PREP="srun" -elif [[ "${step}" = "prepsnowobs" ]]; then - - export APRUN_CALCFIMS="${APRUN_default}" - -elif [[ "${step}" = "prep_emissions" ]]; then - - export APRUN="${APRUN_default}" - -elif [[ "${step}" = "waveinit" ]] || [[ "${step}" = "waveprep" ]] || [[ "${step}" = "wavepostsbs" ]] || [[ "${step}" = "wavepostbndpnt" ]] || [[ "${step}" = "wavepostbndpntbll" ]] || [[ "${step}" = "wavepostpnt" ]]; then - - export CFP_MP="YES" - if [[ "${step}" = "waveprep" ]]; then export MP_PULSE=0 ; fi - export wavempexec=${launcher} - export wave_mpmd=${mpmd_opt} - -elif [[ "${step}" = "atmanlvar" ]]; then - - export NTHREADS_ATMANLVAR=${NTHREADSmax} - export APRUN_ATMANLVAR="${APRUN_default} --cpus-per-task=${NTHREADS_ATMANLVAR}" - -elif [[ "${step}" = "atmensanlobs" ]]; then - - export NTHREADS_ATMENSANLOBS=${NTHREADSmax} - export APRUN_ATMENSANLOBS="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLOBS}" - -elif [[ "${step}" = "atmensanlsol" ]]; then - - export NTHREADS_ATMENSANLSOL=${NTHREADSmax} - export APRUN_ATMENSANLSOL="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLSOL}" - -elif [[ "${step}" = "atmensanlletkf" ]]; then - - export NTHREADS_ATMENSANLLETKF=${NTHREADSmax} - export APRUN_ATMENSANLLETKF="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLLETKF}" - -elif [[ "${step}" = "atmensanlfv3inc" ]]; then - - export NTHREADS_ATMENSANLFV3INC=${NTHREADSmax} - export APRUN_ATMENSANLFV3INC="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLFV3INC}" - -elif [[ "${step}" = "aeroanlvar" ]]; then - - export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" - - export NTHREADS_AEROANL=${NTHREADSmax} - export APRUN_AEROANL="${APRUN_default} --cpus-per-task=${NTHREADS_AEROANL}" - -elif [[ "${step}" = "aeroanlgenb" ]]; then - - export NTHREADS_AEROANLGENB=${NTHREADSmax} - export APRUN_AEROANLGENB="${APRUN_default} --cpus-per-task=${NTHREADS_AEROANLGENB}" - -elif [[ "${step}" = "atmanlfv3inc" ]]; then - - export NTHREADS_ATMANLFV3INC=${NTHREADSmax} - export APRUN_ATMANLFV3INC="${APRUN_default} --cpus-per-task=${NTHREADS_ATMANLFV3INC}" - -elif [[ "${step}" = "prepobsaero" ]]; then - - export NTHREADS_PREPOBSAERO=${NTHREADS1} - export APRUN_PREPOBSAERO="${APRUN_default} --cpus-per-task=${NTHREADS_PREPOBSAERO}" - -elif [[ "${step}" = "snowanl" ]]; then - - export NTHREADS_SNOWANL=${NTHREADSmax} - export APRUN_SNOWANL="${APRUN_default} --cpus-per-task=${NTHREADS_SNOWANL}" - - export APRUN_APPLY_INCR="${launcher} -n 6" - -elif [[ "${step}" = "esnowrecen" ]]; then - - export NTHREADS_ESNOWRECEN=${NTHREADSmax} - export APRUN_ESNOWRECEN="${APRUN_default} --cpus-per-task=${NTHREADS_ESNOWRECEN}" - - export APRUN_APPLY_INCR="${launcher} -n 6" - -elif [[ "${step}" = "marinebmat" ]]; then - - export APRUNCFP="${launcher} -n \$ncmd --multi-prog" - export APRUN_MARINEBMAT="${APRUN_default}" - -elif [[ "${step}" = "marinebmat" ]]; then - - export APRUNCFP="${launcher} -n \$ncmd --multi-prog" - export APRUN_MARINEBMAT="${APRUN_default}" - -elif [[ "${step}" = "marineanlvar" ]]; then - - export APRUNCFP="${launcher} -n \$ncmd --multi-prog" - export APRUN_MARINEANLVAR="${APRUN_default}" - -elif [[ "${step}" = "marineanlchkpt" ]]; then - - export APRUNCFP="${launcher} -n \$ncmd --multi-prog" - export APRUN_MARINEANLCHKPT="${APRUN_default}" - -elif [[ "${step}" = "ocnanalecen" ]]; then - - export NTHREADS_OCNANALECEN=${NTHREADSmax} - export APRUN_OCNANALECEN="${APRUN_default} --cpus-per-task=${NTHREADS_OCNANALECEN}" - -elif [[ "${step}" = "marineanalletkf" ]]; then - - export NTHREADS_MARINEANALLETKF=${NTHREADSmax} - export APRUN_MARINEANALLETKF="${APRUN_default} --cpus-per-task=${NTHREADS_MARINEANALLETKF}" elif [[ "${step}" = "anal" ]] || [[ "${step}" = "analcalc" ]]; then @@ -188,18 +73,6 @@ elif [[ "${step}" = "sfcanl" ]]; then export NTHREADS_CYCLE=${threads_per_task:-14} export APRUN_CYCLE="${APRUN_default} --cpus-per-task=${NTHREADS_CYCLE}" -elif [[ "${step}" = "eobs" ]]; then - - export MKL_NUM_THREADS=4 - export MKL_CBWR=AUTO - - export NTHREADS_GSI=${NTHREADSmax} - export APRUN_GSI="${APRUN_default} --cpus-per-task=${NTHREADS_GSI}" - - export CFP_MP=${CFP_MP:-"YES"} - export USE_CFP=${USE_CFP:-"YES"} - export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" - elif [[ "${step}" = "eupd" ]]; then export NTHREADS_ENKF=${NTHREADSmax} @@ -244,43 +117,11 @@ elif [[ "${step}" = "ecen" ]]; then [[ ${NTHREADS_CALCINC} -gt ${max_threads_per_task} ]] && export NTHREADS_CALCINC=${max_threads_per_task} export APRUN_CALCINC="${APRUN_default} --cpus-per-task=${NTHREADS_CALCINC}" -elif [[ "${step}" = "esfc" ]]; then - - export NTHREADS_ESFC=${threads_per_task_esfc:-${max_threads_per_task}} - export APRUN_ESFC="${APRUN_default} --cpus-per-task=${NTHREADS_ESFC}" - - export NTHREADS_CYCLE=${threads_per_task_cycle:-14} - [[ ${NTHREADS_CYCLE} -gt ${max_tasks_per_node} ]] && export NTHREADS_CYCLE=${max_tasks_per_node} - export APRUN_CYCLE="${APRUN_default} --cpus-per-task=${NTHREADS_CYCLE}" - elif [[ "${step}" = "epos" ]]; then export NTHREADS_EPOS=${NTHREADSmax} export APRUN_EPOS="${APRUN_default} --cpus-per-task=${NTHREADS_EPOS}" -elif [[ "${step}" = "postsnd" ]]; then - - export CFP_MP="YES" - - export NTHREADS_POSTSND=${NTHREADS1} - export APRUN_POSTSND="${APRUN_default} --cpus-per-task=${NTHREADS_POSTSND}" - - export NTHREADS_POSTSNDCFP=${threads_per_task_postsndcfp:-1} - [[ ${NTHREADS_POSTSNDCFP} -gt ${max_threads_per_task} ]] && export NTHREADS_POSTSNDCFP=${max_threads_per_task} - export APRUN_POSTSNDCFP="${launcher} -n ${ntasks_postsndcfp} ${mpmd_opt}" - -elif [[ "${step}" = "awips" ]]; then - - export NTHREADS_AWIPS=${NTHREADS1} - export APRUN_AWIPSCFP="${APRUN_default} ${mpmd_opt}" - -elif [[ "${step}" = "gempak" ]]; then - - export CFP_MP="YES" - - export NTHREADS_GEMPAK=${NTHREADS1} - [[ ${NTHREADS_GEMPAK} -gt ${max_threads_per_task} ]] && export NTHREADS_GEMPAK=${max_threads_per_task} - elif [[ "${step}" = "fit2obs" ]]; then export NTHREADS_FIT2OBS=${NTHREADS1} diff --git a/parm/config/gfs/config.resources.GAEA b/parm/config/gfs/config.resources.GAEA index 4266de2545..2091d3b06c 100644 --- a/parm/config/gfs/config.resources.GAEA +++ b/parm/config/gfs/config.resources.GAEA @@ -54,8 +54,14 @@ case ${step} in # increase WRTTASK_PER_GROUP_PER_THREAD_PER_TILE_{GDAS,GFS} case ${CASE} in "C768") + #export WRTTASK_PER_GROUP_PER_THREAD_PER_TILE_GDAS=20 + #export WRTTASK_PER_GROUP_PER_THREAD_PER_TILE_GFS=25 export WRTTASK_PER_GROUP_PER_THREAD_PER_TILE_GDAS=20 export WRTTASK_PER_GROUP_PER_THREAD_PER_TILE_GFS=25 + (( WRTTASK_PER_GROUP_PER_THREAD_GDAS = WRTTASK_PER_GROUP_PER_THREAD_PER_TILE_GDAS * 6 )) + (( WRTTASK_PER_GROUP_PER_THREAD_GFS = WRTTASK_PER_GROUP_PER_THREAD_PER_TILE_GFS * 6 )) + export WRTTASK_PER_GROUP_PER_THREAD_GDAS + export WRTTASK_PER_GROUP_PER_THREAD_GFS ;; *) ;; From 2b7f661b5416924ca3749897b9df9856bb8be772 Mon Sep 17 00:00:00 2001 From: David Burrows Date: Mon, 7 Oct 2024 14:57:30 -0400 Subject: [PATCH 3/8] add some missing steps in GAEA.env --- env/GAEA.env | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/env/GAEA.env b/env/GAEA.env index e4ec3bd364..3a3dbd6ea3 100755 --- a/env/GAEA.env +++ b/env/GAEA.env @@ -27,6 +27,8 @@ if [[ -n "${ntasks:-}" && -n "${max_tasks_per_node:-}" && -n "${tasks_per_node:- max_threads_per_task=$((max_tasks_per_node / tasks_per_node)) NTHREADSmax=${threads_per_task:-${max_threads_per_task}} NTHREADS1=${threads_per_task:-1} + # This may be useful when Gaea is fully ported, so ignore SC warning + # shellcheck disable=SC2034 [[ ${NTHREADSmax} -gt ${max_threads_per_task} ]] && NTHREADSmax=${max_threads_per_task} [[ ${NTHREADS1} -gt ${max_threads_per_task} ]] && NTHREADS1=${max_threads_per_task} APRUN_default="${launcher} -n ${ntasks}" @@ -42,7 +44,6 @@ if [[ "${step}" = "prep" ]]; then export sys_tp="GAEA" export launcher_PREP="srun" - elif [[ "${step}" = "anal" ]] || [[ "${step}" = "analcalc" ]]; then export MKL_NUM_THREADS=4 @@ -73,6 +74,18 @@ elif [[ "${step}" = "sfcanl" ]]; then export NTHREADS_CYCLE=${threads_per_task:-14} export APRUN_CYCLE="${APRUN_default} --cpus-per-task=${NTHREADS_CYCLE}" +elif [[ "${step}" = "eobs" ]]; then + + export MKL_NUM_THREADS=4 + export MKL_CBWR=AUTO + + export NTHREADS_GSI=${NTHREADSmax} + export APRUN_GSI="${APRUN_default} --cpus-per-task=${NTHREADS_GSI}" + + export CFP_MP=${CFP_MP:-"YES"} + export USE_CFP=${USE_CFP:-"YES"} + export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}" + elif [[ "${step}" = "eupd" ]]; then export NTHREADS_ENKF=${NTHREADSmax} From 116edd7dc9e4ee06095c19cad182e1264029aa12 Mon Sep 17 00:00:00 2001 From: David Burrows Date: Mon, 7 Oct 2024 15:05:06 -0400 Subject: [PATCH 4/8] env/GAEA.env clean up --- env/GAEA.env | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/env/GAEA.env b/env/GAEA.env index 3a3dbd6ea3..90e7b553fa 100755 --- a/env/GAEA.env +++ b/env/GAEA.env @@ -27,10 +27,10 @@ if [[ -n "${ntasks:-}" && -n "${max_tasks_per_node:-}" && -n "${tasks_per_node:- max_threads_per_task=$((max_tasks_per_node / tasks_per_node)) NTHREADSmax=${threads_per_task:-${max_threads_per_task}} NTHREADS1=${threads_per_task:-1} - # This may be useful when Gaea is fully ported, so ignore SC warning - # shellcheck disable=SC2034 [[ ${NTHREADSmax} -gt ${max_threads_per_task} ]] && NTHREADSmax=${max_threads_per_task} [[ ${NTHREADS1} -gt ${max_threads_per_task} ]] && NTHREADS1=${max_threads_per_task} + # This may be useful when Gaea is fully ported, so ignore SC warning + # shellcheck disable=SC2034 APRUN_default="${launcher} -n ${ntasks}" else echo "ERROR config.resources must be sourced before sourcing GAEA.env" From 0a95b82ccb3d8545760ea855b1e737e5ffeed1fd Mon Sep 17 00:00:00 2001 From: David Burrows Date: Tue, 8 Oct 2024 10:38:12 -0400 Subject: [PATCH 5/8] REVERT: added machine specific resource statement to config.ufs --- parm/config/gfs/config.ufs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parm/config/gfs/config.ufs b/parm/config/gfs/config.ufs index 6309c4073b..dbc85e1c32 100644 --- a/parm/config/gfs/config.ufs +++ b/parm/config/gfs/config.ufs @@ -281,7 +281,7 @@ case "${fv3_res}" in export rf_cutoff=100.0 export fv_sg_adj=450 export WRITE_GROUP_GDAS=2 - export WRTTASK_PER_GROUP_PER_THREAD_PER_TILE_GDAS=15 + export WRTTASK_PER_GROUP_PER_THREAD_PER_TILE_GDAS=20 # DAB 15 export WRITE_GROUP_GFS=4 export WRTTASK_PER_GROUP_PER_THREAD_PER_TILE_GFS=20 #Note this should be 10 for WCOSS2 fi From 5c45bc938ea24bd8cae9c25eba7013a426625f9a Mon Sep 17 00:00:00 2001 From: David Burrows Date: Tue, 8 Oct 2024 15:27:33 -0400 Subject: [PATCH 6/8] update config.resources.GAEA case statements with defaults --- parm/config/gfs/config.resources.GAEA | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/parm/config/gfs/config.resources.GAEA b/parm/config/gfs/config.resources.GAEA index 2091d3b06c..48a6b9f09c 100644 --- a/parm/config/gfs/config.resources.GAEA +++ b/parm/config/gfs/config.resources.GAEA @@ -4,15 +4,12 @@ case ${step} in "prep") - # Run on two nodes (requires ~400GB total) tasks_per_node=7 ;; "eobs") - # The number of tasks and cores used must be the same for eobs - # See https://github.com/NOAA-EMC/global-workflow/issues/2092 for details case ${CASE} in - "C768" | "C384") + "C768") export tasks_per_node=50 ;; *) @@ -28,6 +25,8 @@ case ${step} in export ntasks=80 export threads_per_task=20 ;; + *) + ;; esac export tasks_per_node=$(( max_tasks_per_node / threads_per_task )) ;; @@ -38,6 +37,8 @@ case ${step} in "C768") export tasks_per_node=64 ;; + *) + ;; esac ;; @@ -47,6 +48,8 @@ case ${step} in "C768") export tasks_per_node=60 ;; + *) + ;; esac ;; @@ -54,8 +57,6 @@ case ${step} in # increase WRTTASK_PER_GROUP_PER_THREAD_PER_TILE_{GDAS,GFS} case ${CASE} in "C768") - #export WRTTASK_PER_GROUP_PER_THREAD_PER_TILE_GDAS=20 - #export WRTTASK_PER_GROUP_PER_THREAD_PER_TILE_GFS=25 export WRTTASK_PER_GROUP_PER_THREAD_PER_TILE_GDAS=20 export WRTTASK_PER_GROUP_PER_THREAD_PER_TILE_GFS=25 (( WRTTASK_PER_GROUP_PER_THREAD_GDAS = WRTTASK_PER_GROUP_PER_THREAD_PER_TILE_GDAS * 6 )) @@ -64,9 +65,10 @@ case ${step} in export WRTTASK_PER_GROUP_PER_THREAD_GFS ;; *) - ;; + ;; esac ;; + *) ;; From 569e388665cf5ad74b29de5fc1211529f420a47a Mon Sep 17 00:00:00 2001 From: David Burrows Date: Wed, 9 Oct 2024 13:10:52 -0400 Subject: [PATCH 7/8] resource updates after chatting with Dave Huber --- parm/config/gfs/config.resources.GAEA | 8 ++++++++ parm/config/gfs/config.ufs | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/parm/config/gfs/config.resources.GAEA b/parm/config/gfs/config.resources.GAEA index 48a6b9f09c..36ffa725e0 100644 --- a/parm/config/gfs/config.resources.GAEA +++ b/parm/config/gfs/config.resources.GAEA @@ -63,6 +63,14 @@ case ${step} in (( WRTTASK_PER_GROUP_PER_THREAD_GFS = WRTTASK_PER_GROUP_PER_THREAD_PER_TILE_GFS * 6 )) export WRTTASK_PER_GROUP_PER_THREAD_GDAS export WRTTASK_PER_GROUP_PER_THREAD_GFS + (( ntasks_quilt_gdas = WRITE_GROUP_GDAS * WRTTASK_PER_GROUP_PER_THREAD_GDAS )) + (( ntasks_quilt_gfs = WRITE_GROUP_GFS * WRTTASK_PER_GROUP_PER_THREAD_GFS )) + export ntasks_quilt_gdas + export ntasks_quilt_gfs + if [[ "${gaea_sourced_resources:-false}" == false ]]; then + export gaea_sourced_resources=true + source "${EXPDIR}/config.resources" "${step}" + fi ;; *) ;; diff --git a/parm/config/gfs/config.ufs b/parm/config/gfs/config.ufs index dbc85e1c32..6309c4073b 100644 --- a/parm/config/gfs/config.ufs +++ b/parm/config/gfs/config.ufs @@ -281,7 +281,7 @@ case "${fv3_res}" in export rf_cutoff=100.0 export fv_sg_adj=450 export WRITE_GROUP_GDAS=2 - export WRTTASK_PER_GROUP_PER_THREAD_PER_TILE_GDAS=20 # DAB 15 + export WRTTASK_PER_GROUP_PER_THREAD_PER_TILE_GDAS=15 export WRITE_GROUP_GFS=4 export WRTTASK_PER_GROUP_PER_THREAD_PER_TILE_GFS=20 #Note this should be 10 for WCOSS2 fi From 7db0b48f4aa61c11a483893d532dc0e955a075b7 Mon Sep 17 00:00:00 2001 From: David Burrows Date: Thu, 10 Oct 2024 07:53:26 -0400 Subject: [PATCH 8/8] minor update to config.resources.GAEA --- parm/config/gfs/config.resources.GAEA | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/parm/config/gfs/config.resources.GAEA b/parm/config/gfs/config.resources.GAEA index 36ffa725e0..79dd593c72 100644 --- a/parm/config/gfs/config.resources.GAEA +++ b/parm/config/gfs/config.resources.GAEA @@ -8,8 +8,10 @@ case ${step} in ;; "eobs") + # The number of tasks and cores used must be the same for eobs + # See https://github.com/NOAA-EMC/global-workflow/issues/2092 for details case ${CASE} in - "C768") + "C768" | "C384") export tasks_per_node=50 ;; *)