From c67bf19eea1230258b1935ad34522589c8b9ad79 Mon Sep 17 00:00:00 2001 From: Noel Merket Date: Fri, 10 Nov 2023 10:36:27 -0700 Subject: [PATCH] adding service account credentials --- buildstockbatch/hpc.py | 4 ++++ buildstockbatch/kestrel.sh | 1 + buildstockbatch/kestrel_postprocessing.sh | 3 ++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/buildstockbatch/hpc.py b/buildstockbatch/hpc.py index 652bcf06..4427d06e 100644 --- a/buildstockbatch/hpc.py +++ b/buildstockbatch/hpc.py @@ -654,6 +654,10 @@ def queue_post_processing(self, after_jobids=[], upload_only=False, hipri=False) logger.debug("sbatch: {}".format(line)) def get_dask_client(self): + # Keep this, helpful for debugging on a bigmem node + # from dask.distributed import LocalCluster + # cluster = LocalCluster(local_directory="/tmp/scratch/dask", n_workers=90, memory_limit="16GiB") + # return Client(cluster) return Client(scheduler_file=os.path.join(self.output_dir, "dask_scheduler.json")) def process_results(self, *args, **kwargs): diff --git a/buildstockbatch/kestrel.sh b/buildstockbatch/kestrel.sh index d85858b0..5d4b9422 100644 --- a/buildstockbatch/kestrel.sh +++ b/buildstockbatch/kestrel.sh @@ -12,5 +12,6 @@ df -h module load python apptainer source "$MY_PYTHON_ENV/bin/activate" +source /kfs2/shared-projects/buildstock/aws_credentials.sh time python -u -m buildstockbatch.hpc kestrel "$PROJECTFILE" diff --git a/buildstockbatch/kestrel_postprocessing.sh b/buildstockbatch/kestrel_postprocessing.sh index d90962f9..6c86f5ef 100644 --- a/buildstockbatch/kestrel_postprocessing.sh +++ b/buildstockbatch/kestrel_postprocessing.sh @@ -11,6 +11,7 @@ df -h module load python apptainer source "$MY_PYTHON_ENV/bin/activate" +source /kfs2/shared-projects/buildstock/aws_credentials.sh export POSTPROCESS=1 @@ -29,6 +30,6 @@ pdsh -w $SLURM_JOB_NODELIST_PACK_GROUP_1 "free -h" pdsh -w $SLURM_JOB_NODELIST_PACK_GROUP_1 "df -i; df -h" $MY_PYTHON_ENV/bin/dask scheduler --scheduler-file $SCHEDULER_FILE &> $OUT_DIR/dask_scheduler.out & -pdsh -w $SLURM_JOB_NODELIST_PACK_GROUP_1 "$MY_PYTHON_ENV/bin/dask worker --scheduler-file $SCHEDULER_FILE --local-directory /tmp/scratch/dask --nworkers ${NPROCS} --nthreads 1 --memory-limit ${MEMORY}MB" &> $OUT_DIR/dask_workers.out & +pdsh -w $SLURM_JOB_NODELIST_PACK_GROUP_1 "source /kfs2/shared-projects/buildstock/aws_credentials.sh; $MY_PYTHON_ENV/bin/dask worker --scheduler-file $SCHEDULER_FILE --local-directory /tmp/scratch/dask --nworkers ${NPROCS} --nthreads 1 --memory-limit ${MEMORY}MB" &> $OUT_DIR/dask_workers.out & time python -u -m buildstockbatch.hpc kestrel "$PROJECTFILE"