NBISweden · verku · Jun 19, 2024 · Jun 19, 2024 · Jun 19, 2024
diff --git a/config/slurm/README.md b/config/slurm/README.md
@@ -1,58 +1,15 @@
-# GenErode execution on SLURM clusters
+# GenErode execution on Dardel (PDC/KTH)
 
-With the switch to Snakemake version 8, GenErode can be run 
-the following on SLURM clusters:
-
-1) Create the GenErode conda environment or update an earlier 
-version. The latest conda environment contains the Snakemake 
-executor plugin for slurm:
-
-```
-conda create -f environment.yaml -n generode
-```
-
-2) Copy one of the example configuration files `config/slurm/profile/config_plugin_rackham.yaml` 
-or `config/slurm/profile/config_plugin_dardel.yaml` to 
-`slurm/config.yaml`. This file specifies compute resources 
-for each rule or group jobs. Any rule or group job that is 
-not listed under `set-threads` or `set-resources` uses 
-default resources specified under `default-resources`. If 
-any rule or group jobs fail due to too little memory or run 
-time, their compute resources can be updated in this file. 
-
-> Note that the current configuration files were adjusted to the 
-HPC clusters Rackham from UPPMAX and Dardel from PDC/KTH. Details 
-on how to configure and run GenErode on Dardel are provided below. 
-The configuration file for Snakemake version 7 was kept for comparison 
-which was also written for Rackham/UPPMAX. 
-
-3) Start GenErode the following:
-
-- Open a tmux or screen session
-- Activate the GenErode conda environment
-- Start the dry run:
-
-```
-snakemake --profile slurm -np &> YYMMDD_dry.out
-```
-
-- Start the main run:
-
-```
-snakemake --profile slurm &> YYMMDD_main.out
-```
-
-> Useful flags for running the pipeline: `--ri` to re-run 
-incomplete jobs and `-k` to keep going in case a job fails. 
-
-## Specific instructions for Dardel
-
-1) Load the following modules on Dardel:
+1) Load the following modules:
 
 ```
 module load PDC UPPMAX bioinfo-tools conda singularity tmux
 ```
 
+> Note that tmux is only available as a module on Dardel 
+but the equivalent tool screen is pre-installed and does 
+not need to be loaded. 
+
 2) After cloning the repository, change permissions for the 
 Snakefile:
 
@@ -73,12 +30,17 @@ to `slurm/config.yaml`. This file specifies compute resources
 for each rule or group jobs to be run on Dardel. Any rule or 
 group job that is not listed under `set-threads` or `set-resources` 
 uses default resources specified under `default-resources`. If 
-any rule or group jobs fail due to too little memory or run 
+any rule or group job fails due to too little memory or run 
 time, their compute resources can be updated in this file. 
 
-> Note that the current version of `config/slurm/profile/config_plugin_dardel.yaml` 
-is still being tested. Threads are currently specified under 
-`set-threads` and under `set-resources` as `cpus_per_task`.
+> Note that memory requirements are specified three times in 
+the configuration file: 1) under `set-threads` (used by Snakemake 
+to specify threads in rules), 2) under `set-resources` and therein 
+under `mem_mb`, specifying the memory in Megabytes (multiplying 
+the number of threads with the available memory per thread), 
+and 3) under `set-resources` and therein under `cpus-per-task` 
+(the same number as specified under `set-threads`, required for 
+correct memory assignment on Dardel). 
 
 5) Start GenErode the following:
 
@@ -96,7 +58,7 @@ conda activate generode
 - Start the dry run:
 
 ```
-snakemake --profile slurm -np &> YYMMDD_dry.out
+snakemake --profile slurm -n &> YYMMDD_dry.out
 ```
 
 - Start the main run:

diff --git a/config/slurm/profile/config_plugin_dardel.yaml b/config/slurm/profile/config_plugin_dardel.yaml
@@ -112,8 +112,10 @@ set-resources:
     cpus_per_task: 16
   fastqc_historical_raw:
     mem_mb: 16000
+    cpus_per_task: 16
   fastqc_modern_raw:
     mem_mb: 16000
+    cpus_per_task: 16
   fastp_historical:
     runtime: 600
     mem_mb: 32000
@@ -225,9 +227,7 @@ set-resources:
     cpus_per_task: 32
   sort_vcfs:
     runtime: 1440
-  sort_vcfs:
     mem_mb: 16000
-  sort_vcfs:
     cpus_per_task: 16
   sorted_bcf2vcf:
     runtime: 300

diff --git a/config/slurm/profile/config_plugin_rackham.yaml b/config/slurm/profile/config_plugin_rackham.yaml
@@ -106,8 +106,10 @@ set-resources:
     cpus_per_task: 2
   fastqc_historical_raw:
     mem_mb: 12800
+    cpus_per_task: 2
   fastqc_modern_raw:
     mem_mb: 12800
+    cpus_per_task: 2
   fastp_historical:
     runtime: 600
     mem_mb: 32000

diff --git a/utilities/mutational_load_snpeff/slurm/profile/README.md b/utilities/mutational_load_snpeff/slurm/profile/README.md
@@ -0,0 +1,71 @@
+# GenErode execution on Dardel
+
+1) Load the following modules:
+
+```
+module load PDC UPPMAX bioinfo-tools conda singularity tmux
+```
+
+> Note that tmux is only available as a module on Dardel 
+but the equivalent tool screen is pre-installed and does 
+not need to be loaded. 
+
+2) After cloning the repository, change permissions for the 
+Snakefile:
+
+```
+chmod 755 Snakefile
+```
+
+3) Create the GenErode conda environment or update an earlier 
+version. The latest conda environment contains the Snakemake 
+executor plugin for slurm:
+
+```
+conda create -f environment.yaml -n generode
+```
+
+4) Copy the configuration file `config/slurm/profile/config_plugin_dardel.yaml` 
+to `slurm/config.yaml`. This file specifies compute resources 
+for each rule or group jobs to be run on Dardel. Any rule or 
+group job that is not listed under `set-threads` or `set-resources` 
+uses default resources specified under `default-resources`. If 
+any rule or group job fails due to too little memory or run 
+time, their compute resources can be updated in this file. 
+
+> Note that memory requirements are specified three times in 
+the configuration file: 1) under `set-threads` (used by Snakemake 
+to specify threads in rules), 2) under `set-resources` and therein 
+under `mem_mb`, specifying the memory in Megabytes (multiplying 
+the number of threads with the available memory per thread), 
+and 3) under `set-resources` and therein under `cpus-per-task` 
+(the same number as specified under `set-threads`, required for 
+correct memory assignment on Dardel). 
+
+5) Start GenErode the following:
+
+- Open a tmux session (alternatively, you can use screen)
+
+- Activate the GenErode conda environment (create or update 
+from `environment.yaml`), replacing the path to the location 
+of the conda environment:
+
+```
+export CONDA_ENVS_PATH=/cfs/klemming/home/.../
+conda activate generode
+```
+
+- Start the dry run:
+
+```
+snakemake --profile slurm -n &> YYMMDD_dry.out
+```
+
+- Start the main run:
+
+```
+snakemake --profile slurm &> YYMMDD_main.out
+```
+
+> Useful flags for running the pipeline: `--ri` to re-run 
+incomplete jobs and `-k` to keep going in case a job fails. 
diff --git a/utilities/mutational_load_snpeff/slurm/profile/config_plugin.yaml b/utilities/mutational_load_snpeff/slurm/profile/config_plugin.yaml
diff --git a/utilities/mutational_load_snpeff/slurm/profile/config_plugin_dardel.yaml b/utilities/mutational_load_snpeff/slurm/profile/config_plugin_dardel.yaml
@@ -0,0 +1,74 @@
+# Configuration file for slurm plugin (Snakemake >8.0.0) for Dardel cluster at PDC/KTH
+# snakemake CLI flags
+executor: slurm
+jobs: 100
+printshellcmds: true
+software-deployment-method: apptainer
+
+# slurm resources
+## default-resources: applied to all jobs, overruled by resources defined below for jobs
+default-resources: 
+  slurm_account: XXX-XX-XXX # update this to your slurm account
+  slurm_partition: shared # use Dardel’s shared partition
+  runtime: 120 # default runtime in minutes
+  mem_mb: 8000
+  nodes: 1 # one node on Dardel from the shared partition
+  ntasks: 1 # number of concurrent tasks / ranks
+  cpus_per_task: 8 # number of hyperthreads per task, corresponds to 1 GB RAM
+
+## map rule names to threads
+set-threads:
+  extract_number_of_samples: 16
+  find_fixed_homozygote_alt_sites: 32
+  remove_fixed_homozygote_alt_sites_merged_vcf: 32
+  find_intron_intergenic_variants: 16
+  remove_sites_snpEff_vcf: 32
+  extract_high_impact_snps: 16
+  extract_moderate_impact_snps: 16
+  extract_low_impact_snps: 16
+  extract_synonymous_variant_snps: 16
+  total_load: 8
+  realised_load: 8
+
+## set-resources: map rule names to resources in general
+set-resources:
+  extract_number_of_samples:
+    mem_mb: 16000
+    runtime: 30
+    cpus_per_task: 16
+  find_fixed_homozygote_alt_sites:
+    mem_mb: 32000
+    runtime: 300
+    cpus_per_task: 32
+  remove_fixed_homozygote_alt_sites_merged_vcf:
+    mem_mb: 32000
+    runtime: 300
+    cpus_per_task: 32
+  find_intron_intergenic_variants:
+    mem_mb: 16000
+    runtime: 300
+    cpus_per_task: 16
+  remove_sites_snpEff_vcf:
+    mem_mb: 32000
+    runtime: 300
+    cpus_per_task: 32
+  extract_high_impact_snps:
+    mem_mb: 16000
+    cpus_per_task: 16
+  extract_moderate_impact_snps:
+    mem_mb: 16000
+    cpus_per_task: 16
+  extract_low_impact_snps:
+    mem_mb: 16000
+    cpus_per_task: 16
+  extract_synonymous_variant_snps:
+    mem_mb: 16000
+    cpus_per_task: 16
+  total_load:
+    mem_mb: 8000
+    runtime: 30
+    cpus_per_task: 8
+  realised_load:
+    mem_mb: 8000
+    runtime: 30
+    cpus_per_task: 8