From a803dc460a0fb07459b25c0516f4fe054763041d Mon Sep 17 00:00:00 2001
From: verku <verena.kutschera@scilifelab.se>
Date: Wed, 19 Jun 2024 17:07:47 +0200
Subject: [PATCH 1/2] Update configuration files for slurm executor plugin
 profiles

---
 config/slurm/README.md                        |  28 +++-
 .../slurm/profile/config_plugin_dardel.yaml   |   4 +-
 .../slurm/profile/config_plugin_rackham.yaml  |   2 +
 .../slurm/profile/README.md                   | 122 ++++++++++++++++++
 .../slurm/profile/config_plugin.yaml          |  53 --------
 .../slurm/profile/config_plugin_dardel.yaml   |  74 +++++++++++
 6 files changed, 222 insertions(+), 61 deletions(-)
 create mode 100644 utilities/mutational_load_snpeff/slurm/profile/README.md
 delete mode 100644 utilities/mutational_load_snpeff/slurm/profile/config_plugin.yaml
 create mode 100644 utilities/mutational_load_snpeff/slurm/profile/config_plugin_dardel.yaml

diff --git a/config/slurm/README.md b/config/slurm/README.md
index 697d7f7..04845f2 100644
--- a/config/slurm/README.md
+++ b/config/slurm/README.md
@@ -23,7 +23,14 @@ time, their compute resources can be updated in this file.
 > Note that the current configuration files were adjusted to the 
 HPC clusters Rackham from UPPMAX and Dardel from PDC/KTH. Details 
 on how to configure and run GenErode on Dardel are provided below. 
-The configuration file for Snakemake version 7 was kept for comparison 
+Memory requirements are specified three times in these configuration 
+files: 1) under `set-threads` (used by Snakemake to specify threads 
+in rules), 2) under `set-resources` and therein under `mem_mb`, 
+specifying the memory in Megabytes (multiplying the number of threads 
+with the available memory per thread), and 3) under `set-resources` 
+and therein under `cpus-per-task` (the same number as specified under 
+`set-threads`, required for correct memory assignment on Dardel). The 
+configuration file for Snakemake version 7 was kept for comparison, 
 which was also written for Rackham/UPPMAX. 
 
 3) Start GenErode the following:
@@ -53,6 +60,10 @@ incomplete jobs and `-k` to keep going in case a job fails.
 module load PDC UPPMAX bioinfo-tools conda singularity tmux
 ```
 
+> Note that tmux is only available as a module on Dardel 
+but the equivalent tool screen is pre-installed and does 
+not need to be loaded. 
+
 2) After cloning the repository, change permissions for the 
 Snakefile:
 
@@ -73,12 +84,17 @@ to `slurm/config.yaml`. This file specifies compute resources
 for each rule or group jobs to be run on Dardel. Any rule or 
 group job that is not listed under `set-threads` or `set-resources` 
 uses default resources specified under `default-resources`. If 
-any rule or group jobs fail due to too little memory or run 
+any rule or group job fails due to too little memory or run 
 time, their compute resources can be updated in this file. 
 
-> Note that the current version of `config/slurm/profile/config_plugin_dardel.yaml` 
-is still being tested. Threads are currently specified under 
-`set-threads` and under `set-resources` as `cpus_per_task`.
+> Note that memory requirements are specified three times in 
+the configuration file: 1) under `set-threads` (used by Snakemake 
+to specify threads in rules), 2) under `set-resources` and therein 
+under `mem_mb`, specifying the memory in Megabytes (multiplying 
+the number of threads with the available memory per thread), 
+and 3) under `set-resources` and therein under `cpus-per-task` 
+(the same number as specified under `set-threads`, required for 
+correct memory assignment on Dardel). 
 
 5) Start GenErode the following:
 
@@ -96,7 +112,7 @@ conda activate generode
 - Start the dry run:
 
 ```
-snakemake --profile slurm -np &> YYMMDD_dry.out
+snakemake --profile slurm -n &> YYMMDD_dry.out
 ```
 
 - Start the main run:
diff --git a/config/slurm/profile/config_plugin_dardel.yaml b/config/slurm/profile/config_plugin_dardel.yaml
index 7732b36..50504df 100644
--- a/config/slurm/profile/config_plugin_dardel.yaml
+++ b/config/slurm/profile/config_plugin_dardel.yaml
@@ -112,8 +112,10 @@ set-resources:
     cpus_per_task: 16
   fastqc_historical_raw:
     mem_mb: 16000
+    cpus_per_task: 16
   fastqc_modern_raw:
     mem_mb: 16000
+    cpus_per_task: 16
   fastp_historical:
     runtime: 600
     mem_mb: 32000
@@ -225,9 +227,7 @@ set-resources:
     cpus_per_task: 32
   sort_vcfs:
     runtime: 1440
-  sort_vcfs:
     mem_mb: 16000
-  sort_vcfs:
     cpus_per_task: 16
   sorted_bcf2vcf:
     runtime: 300
diff --git a/config/slurm/profile/config_plugin_rackham.yaml b/config/slurm/profile/config_plugin_rackham.yaml
index 0d931d1..2fae685 100644
--- a/config/slurm/profile/config_plugin_rackham.yaml
+++ b/config/slurm/profile/config_plugin_rackham.yaml
@@ -106,8 +106,10 @@ set-resources:
     cpus_per_task: 2
   fastqc_historical_raw:
     mem_mb: 12800
+    cpus_per_task: 2
   fastqc_modern_raw:
     mem_mb: 12800
+    cpus_per_task: 2
   fastp_historical:
     runtime: 600
     mem_mb: 32000
diff --git a/utilities/mutational_load_snpeff/slurm/profile/README.md b/utilities/mutational_load_snpeff/slurm/profile/README.md
new file mode 100644
index 0000000..bfcdfc5
--- /dev/null
+++ b/utilities/mutational_load_snpeff/slurm/profile/README.md
@@ -0,0 +1,122 @@
+# GenErode execution on SLURM clusters
+
+With the switch to Snakemake version 8, GenErode can be run 
+the following on SLURM clusters:
+
+1) Create the GenErode conda environment or update an earlier 
+version. The latest conda environment contains the Snakemake 
+executor plugin for slurm:
+
+```
+conda create -f environment.yaml -n generode
+```
+
+2) Copy the example configuration file `slurm/profile/config_plugin_dardel.yaml` 
+to `slurm/config.yaml`. This file specifies compute resources 
+for each rule or group jobs. Any rule or group job that is 
+not listed under `set-threads` or `set-resources` uses 
+default resources specified under `default-resources`. If 
+any rule or group jobs fail due to too little memory or run 
+time, their compute resources can be updated in this file. 
+
+> Note that the current configuration file was adjusted to the 
+HPC cluster Dardel from PDC/KTH. Details on how to configure and 
+run GenErode on Dardel are provided below. Memory requirements are 
+specified three times in the configuration file: 1) under 
+`set-threads` (used by Snakemake to specify threads in rules), 2) 
+under `set-resources` and therein under `mem_mb`, specifying the 
+memory in Megabytes (multiplying the number of threads with the 
+available memory per thread), and 3) under `set-resources` and 
+therein under `cpus-per-task` (the same number as specified under 
+`set-threads`, required for correct memory assignment on Dardel). 
+
+3) Start GenErode the following:
+
+- Open a tmux or screen session
+- Activate the GenErode conda environment
+- Start the dry run:
+
+```
+snakemake --profile slurm -n &> YYMMDD_dry.out
+```
+
+- Start the main run:
+
+```
+snakemake --profile slurm &> YYMMDD_main.out
+```
+
+> Useful flags for running the pipeline: `--ri` to re-run 
+incomplete jobs and `-k` to keep going in case a job fails. 
+
+## Specific instructions for Dardel
+
+1) Load the following modules on Dardel:
+
+```
+module load PDC UPPMAX bioinfo-tools conda singularity tmux
+```
+
+> Note that tmux is only available as a module on Dardel 
+but the equivalent tool screen is pre-installed and does 
+not need to be loaded. 
+
+2) After cloning the repository, change permissions for the 
+Snakefile:
+
+```
+chmod 755 Snakefile
+```
+
+3) Create the GenErode conda environment or update an earlier 
+version. The latest conda environment contains the Snakemake 
+executor plugin for slurm:
+
+```
+conda create -f environment.yaml -n generode
+```
+
+4) Copy the configuration file `config/slurm/profile/config_plugin_dardel.yaml` 
+to `slurm/config.yaml`. This file specifies compute resources 
+for each rule or group jobs to be run on Dardel. Any rule or 
+group job that is not listed under `set-threads` or `set-resources` 
+uses default resources specified under `default-resources`. If 
+any rule or group job fails due to too little memory or run 
+time, their compute resources can be updated in this file. 
+
+> Note that memory requirements are specified three times in 
+the configuration file: 1) under `set-threads` (used by Snakemake 
+to specify threads in rules), 2) under `set-resources` and therein 
+under `mem_mb`, specifying the memory in Megabytes (multiplying 
+the number of threads with the available memory per thread), 
+and 3) under `set-resources` and therein under `cpus-per-task` 
+(the same number as specified under `set-threads`, required for 
+correct memory assignment on Dardel). 
+
+5) Start GenErode the following:
+
+- Open a tmux session (alternatively, you can use screen)
+
+- Activate the GenErode conda environment (create or update 
+from `environment.yaml`), replacing the path to the location 
+of the conda environment:
+
+```
+export CONDA_ENVS_PATH=/cfs/klemming/home/.../
+conda activate generode
+```
+
+- Start the dry run:
+
+```
+snakemake --profile slurm -n &> YYMMDD_dry.out
+```
+
+- Start the main run:
+
+```
+snakemake --profile slurm &> YYMMDD_main.out
+```
+
+> Useful flags for running the pipeline: `--ri` to re-run 
+incomplete jobs and `-k` to keep going in case a job fails. 
diff --git a/utilities/mutational_load_snpeff/slurm/profile/config_plugin.yaml b/utilities/mutational_load_snpeff/slurm/profile/config_plugin.yaml
deleted file mode 100644
index c2e8b05..0000000
--- a/utilities/mutational_load_snpeff/slurm/profile/config_plugin.yaml
+++ /dev/null
@@ -1,53 +0,0 @@
-# Configuration file for slurm plugin (Snakemake >8.0.0) for Dardel cluster at PDC/KTH
-# snakemake CLI flags
-executor: slurm
-jobs: 100
-cores: 1
-printshellcmds: true
-software-deployment-method: apptainer
-
-# slurm resources
-## default-resources: applied to all jobs, overruled by resources defined below for jobs
-default-resources: 
-  slurm_account: XXX-XX-XXX # update this to your slurm account
-  slurm_partition: shared # use Dardel’s shared partition
-  runtime: 120 # default runtime in minutes
-  mem_mb: 8000
-  nodes: 1 # one node on Dardel from the shared partition
-  ntasks: 1 # number of concurrent tasks / ranks
-  cpus_per_task: 8 # number of hyperthreads per task, corresponds to 1 GB RAM
-set-threads: # map rule names to threads
-  - extract_number_of_samples=16
-  - find_fixed_homozygote_alt_sites=32
-  - remove_fixed_homozygote_alt_sites_merged_vcf=32
-  - find_intron_intergenic_variants=16
-  - remove_sites_snpEff_vcf=32
-  - extract_high_impact_snps=16
-  - extract_moderate_impact_snps=16
-  - extract_low_impact_snps=16
-  - extract_synonymous_variant_snps=16
-  - total_load=8
-  - realised_load=8
-set-resources: # map rule names to resources in general
-  - extract_number_of_samples:mem_mb=16000
-  - extract_number_of_samples:runtime=30
-  - find_fixed_homozygote_alt_sites:mem_mb=32000
-  - find_fixed_homozygote_alt_sites:runtime=300
-  - remove_fixed_homozygote_alt_sites_merged_vcf:mem_mb=32000
-  - remove_fixed_homozygote_alt_sites_merged_vcf:runtime=300
-  - find_intron_intergenic_variants:mem_mb=16000
-  - find_intron_intergenic_variants:runtime=300
-  - remove_sites_snpEff_vcf:mem_mb=32000
-  - remove_sites_snpEff_vcf:runtime=300
-  - extract_high_impact_snps:mem_mb=16000
-  - extract_high_impact_snps:runtime=120
-  - extract_moderate_impact_snps:mem_mb=16000
-  - extract_moderate_impact_snps:runtime=120
-  - extract_low_impact_snps:mem_mb=16000
-  - extract_low_impact_snps:runtime=120
-  - extract_synonymous_variant_snps:mem_mb=16000
-  - extract_synonymous_variant_snps:runtime=120
-  - total_load:mem_mb=6400
-  - total_load:runtime=30
-  - realised_load:mem_mb=6400
-  - realised_load:runtime=30
\ No newline at end of file
diff --git a/utilities/mutational_load_snpeff/slurm/profile/config_plugin_dardel.yaml b/utilities/mutational_load_snpeff/slurm/profile/config_plugin_dardel.yaml
new file mode 100644
index 0000000..32cb4b9
--- /dev/null
+++ b/utilities/mutational_load_snpeff/slurm/profile/config_plugin_dardel.yaml
@@ -0,0 +1,74 @@
+# Configuration file for slurm plugin (Snakemake >8.0.0) for Dardel cluster at PDC/KTH
+# snakemake CLI flags
+executor: slurm
+jobs: 100
+printshellcmds: true
+software-deployment-method: apptainer
+
+# slurm resources
+## default-resources: applied to all jobs, overruled by resources defined below for jobs
+default-resources: 
+  slurm_account: XXX-XX-XXX # update this to your slurm account
+  slurm_partition: shared # use Dardel’s shared partition
+  runtime: 120 # default runtime in minutes
+  mem_mb: 8000
+  nodes: 1 # one node on Dardel from the shared partition
+  ntasks: 1 # number of concurrent tasks / ranks
+  cpus_per_task: 8 # number of hyperthreads per task, corresponds to 1 GB RAM
+
+## map rule names to threads
+set-threads:
+  extract_number_of_samples: 16
+  find_fixed_homozygote_alt_sites: 32
+  remove_fixed_homozygote_alt_sites_merged_vcf: 32
+  find_intron_intergenic_variants: 16
+  remove_sites_snpEff_vcf: 32
+  extract_high_impact_snps: 16
+  extract_moderate_impact_snps: 16
+  extract_low_impact_snps: 16
+  extract_synonymous_variant_snps: 16
+  total_load: 8
+  realised_load: 8
+
+## set-resources: map rule names to resources in general
+set-resources:
+  extract_number_of_samples:
+    mem_mb: 16000
+    runtime: 30
+    cpus_per_task: 16
+  find_fixed_homozygote_alt_sites:
+    mem_mb: 32000
+    runtime: 300
+    cpus_per_task: 32
+  remove_fixed_homozygote_alt_sites_merged_vcf:
+    mem_mb: 32000
+    runtime: 300
+    cpus_per_task: 32
+  find_intron_intergenic_variants:
+    mem_mb: 16000
+    runtime: 300
+    cpus_per_task: 16
+  remove_sites_snpEff_vcf:
+    mem_mb: 32000
+    runtime: 300
+    cpus_per_task: 32
+  extract_high_impact_snps:
+    mem_mb: 16000
+    cpus_per_task: 16
+  extract_moderate_impact_snps:
+    mem_mb: 16000
+    cpus_per_task: 16
+  extract_low_impact_snps:
+    mem_mb: 16000
+    cpus_per_task: 16
+  extract_synonymous_variant_snps:
+    mem_mb: 16000
+    cpus_per_task: 16
+  total_load:
+    mem_mb: 8000
+    runtime: 30
+    cpus_per_task: 8
+  realised_load:
+    mem_mb: 8000
+    runtime: 30
+    cpus_per_task: 8

From f842973ce902d832d6e244dbad81d7e6bf1062e0 Mon Sep 17 00:00:00 2001
From: verku <verena.kutschera@scilifelab.se>
Date: Wed, 19 Jun 2024 17:45:28 +0200
Subject: [PATCH 2/2] Move general instructions to WIKI

---
 config/slurm/README.md                        | 58 +------------------
 .../slurm/profile/README.md                   | 55 +-----------------
 2 files changed, 4 insertions(+), 109 deletions(-)

diff --git a/config/slurm/README.md b/config/slurm/README.md
index 04845f2..a2337ae 100644
--- a/config/slurm/README.md
+++ b/config/slurm/README.md
@@ -1,60 +1,6 @@
-# GenErode execution on SLURM clusters
+# GenErode execution on Dardel (PDC/KTH)
 
-With the switch to Snakemake version 8, GenErode can be run 
-the following on SLURM clusters:
-
-1) Create the GenErode conda environment or update an earlier 
-version. The latest conda environment contains the Snakemake 
-executor plugin for slurm:
-
-```
-conda create -f environment.yaml -n generode
-```
-
-2) Copy one of the example configuration files `config/slurm/profile/config_plugin_rackham.yaml` 
-or `config/slurm/profile/config_plugin_dardel.yaml` to 
-`slurm/config.yaml`. This file specifies compute resources 
-for each rule or group jobs. Any rule or group job that is 
-not listed under `set-threads` or `set-resources` uses 
-default resources specified under `default-resources`. If 
-any rule or group jobs fail due to too little memory or run 
-time, their compute resources can be updated in this file. 
-
-> Note that the current configuration files were adjusted to the 
-HPC clusters Rackham from UPPMAX and Dardel from PDC/KTH. Details 
-on how to configure and run GenErode on Dardel are provided below. 
-Memory requirements are specified three times in these configuration 
-files: 1) under `set-threads` (used by Snakemake to specify threads 
-in rules), 2) under `set-resources` and therein under `mem_mb`, 
-specifying the memory in Megabytes (multiplying the number of threads 
-with the available memory per thread), and 3) under `set-resources` 
-and therein under `cpus-per-task` (the same number as specified under 
-`set-threads`, required for correct memory assignment on Dardel). The 
-configuration file for Snakemake version 7 was kept for comparison, 
-which was also written for Rackham/UPPMAX. 
-
-3) Start GenErode the following:
-
-- Open a tmux or screen session
-- Activate the GenErode conda environment
-- Start the dry run:
-
-```
-snakemake --profile slurm -np &> YYMMDD_dry.out
-```
-
-- Start the main run:
-
-```
-snakemake --profile slurm &> YYMMDD_main.out
-```
-
-> Useful flags for running the pipeline: `--ri` to re-run 
-incomplete jobs and `-k` to keep going in case a job fails. 
-
-## Specific instructions for Dardel
-
-1) Load the following modules on Dardel:
+1) Load the following modules:
 
 ```
 module load PDC UPPMAX bioinfo-tools conda singularity tmux
diff --git a/utilities/mutational_load_snpeff/slurm/profile/README.md b/utilities/mutational_load_snpeff/slurm/profile/README.md
index bfcdfc5..46f4da3 100644
--- a/utilities/mutational_load_snpeff/slurm/profile/README.md
+++ b/utilities/mutational_load_snpeff/slurm/profile/README.md
@@ -1,57 +1,6 @@
-# GenErode execution on SLURM clusters
+# GenErode execution on Dardel
 
-With the switch to Snakemake version 8, GenErode can be run 
-the following on SLURM clusters:
-
-1) Create the GenErode conda environment or update an earlier 
-version. The latest conda environment contains the Snakemake 
-executor plugin for slurm:
-
-```
-conda create -f environment.yaml -n generode
-```
-
-2) Copy the example configuration file `slurm/profile/config_plugin_dardel.yaml` 
-to `slurm/config.yaml`. This file specifies compute resources 
-for each rule or group jobs. Any rule or group job that is 
-not listed under `set-threads` or `set-resources` uses 
-default resources specified under `default-resources`. If 
-any rule or group jobs fail due to too little memory or run 
-time, their compute resources can be updated in this file. 
-
-> Note that the current configuration file was adjusted to the 
-HPC cluster Dardel from PDC/KTH. Details on how to configure and 
-run GenErode on Dardel are provided below. Memory requirements are 
-specified three times in the configuration file: 1) under 
-`set-threads` (used by Snakemake to specify threads in rules), 2) 
-under `set-resources` and therein under `mem_mb`, specifying the 
-memory in Megabytes (multiplying the number of threads with the 
-available memory per thread), and 3) under `set-resources` and 
-therein under `cpus-per-task` (the same number as specified under 
-`set-threads`, required for correct memory assignment on Dardel). 
-
-3) Start GenErode the following:
-
-- Open a tmux or screen session
-- Activate the GenErode conda environment
-- Start the dry run:
-
-```
-snakemake --profile slurm -n &> YYMMDD_dry.out
-```
-
-- Start the main run:
-
-```
-snakemake --profile slurm &> YYMMDD_main.out
-```
-
-> Useful flags for running the pipeline: `--ri` to re-run 
-incomplete jobs and `-k` to keep going in case a job fails. 
-
-## Specific instructions for Dardel
-
-1) Load the following modules on Dardel:
+1) Load the following modules:
 
 ```
 module load PDC UPPMAX bioinfo-tools conda singularity tmux