From 20de27c32f5520cf7cf7dfef19fcc88c2b7bfaf9 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Tue, 28 Nov 2023 09:16:22 -0500 Subject: [PATCH 1/7] move fastmnn and harmony functions into new file in modules --- integrate.nf | 52 --------------------------------------- modules/integrate-sces.nf | 51 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 52 deletions(-) create mode 100644 modules/integrate-sces.nf diff --git a/integrate.nf b/integrate.nf index 11160553..8600cdaf 100644 --- a/integrate.nf +++ b/integrate.nf @@ -54,57 +54,6 @@ process merge_sce { } -// integrate with fastMNN -process integrate_fastmnn { - container params.SCPCATOOLS_CONTAINER - label 'mem_16' - label 'cpus_4' - input: - tuple val(integration_group), path(merged_sce_file) - output: - tuple val(integration_group), path(integrated_sce_file) - script: - integrated_sce_file = "${integration_group}.rds" - """ - integrate_sce.R \ - --input_sce_file "${merged_sce_file}" \ - --output_sce_file "${integrated_sce_file}" \ - --method "fastMNN" \ - --seed ${params.seed} \ - --threads ${task.cpus} - """ - stub: - integrated_sce_file = "${integration_group}.rds" - """ - touch ${integrated_sce_file} - """ -} - -// integrate with fastMNN -process integrate_harmony { - container params.SCPCATOOLS_CONTAINER - publishDir "${params.results_dir}/integration/${integration_group}" - label 'mem_16' - input: - tuple val(integration_group), path(merged_sce_file) - output: - tuple val(integration_group), path(integrated_sce_file) - script: - integrated_sce_file = "${integration_group}.rds" - """ - integrate_sce.R \ - --input_sce_file "${merged_sce_file}" \ - --output_sce_file "${integrated_sce_file}" \ - --method "harmony" \ - --seed ${params.seed} - """ - stub: - integrated_sce_file = "${integration_group}.rds" - """ - touch ${integrated_sce_file} - """ -} - // create integrated report and single object process integration_report { container params.SCPCATOOLS_CONTAINER @@ -180,4 +129,3 @@ workflow { // generate integration report integration_report(integrate_harmony.out, file(integration_template)) } - diff --git a/modules/integrate-sces.nf b/modules/integrate-sces.nf new file mode 100644 index 00000000..61b9165a --- /dev/null +++ b/modules/integrate-sces.nf @@ -0,0 +1,51 @@ + +// Process to integrate SCEs with fastMNN +process integrate_fastmnn { + container params.SCPCATOOLS_CONTAINER + label 'mem_16' + label 'cpus_4' + input: + tuple val(integration_group), path(merged_sce_file) + output: + tuple val(integration_group), path(integrated_sce_file) + script: + integrated_sce_file = "${integration_group}.rds" + """ + integrate_sce.R \ + --input_sce_file "${merged_sce_file}" \ + --output_sce_file "${integrated_sce_file}" \ + --method "fastMNN" \ + --seed ${params.seed} \ + --threads ${task.cpus} + """ + stub: + integrated_sce_file = "${integration_group}.rds" + """ + touch ${integrated_sce_file} + """ +} + +// Process to integrate SCEs with Harmony +process integrate_harmony { + container params.SCPCATOOLS_CONTAINER + publishDir "${params.results_dir}/integration/${integration_group}" + label 'mem_16' + input: + tuple val(integration_group), path(merged_sce_file) + output: + tuple val(integration_group), path(integrated_sce_file) + script: + integrated_sce_file = "${integration_group}.rds" + """ + integrate_sce.R \ + --input_sce_file "${merged_sce_file}" \ + --output_sce_file "${integrated_sce_file}" \ + --method "harmony" \ + --seed ${params.seed} + """ + stub: + integrated_sce_file = "${integration_group}.rds" + """ + touch ${integrated_sce_file} + """ +} From 237b76531b6339a10ab646e46fc3c1af0379e49c Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Tue, 28 Nov 2023 09:17:02 -0500 Subject: [PATCH 2/7] add comment that they are unused right now --- modules/integrate-sces.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/integrate-sces.nf b/modules/integrate-sces.nf index 61b9165a..069451f4 100644 --- a/modules/integrate-sces.nf +++ b/modules/integrate-sces.nf @@ -1,3 +1,4 @@ +// These processes are not currently used by any workflow. // Process to integrate SCEs with fastMNN process integrate_fastmnn { From 6af22b64a4191cde98ba97427384f7d3814edcff Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Tue, 28 Nov 2023 10:57:52 -0500 Subject: [PATCH 3/7] rename workflow file --- integrate.nf => merge.nf | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename integrate.nf => merge.nf (100%) diff --git a/integrate.nf b/merge.nf similarity index 100% rename from integrate.nf rename to merge.nf From 7ffc2c24de18bbce86176af2d373ebfcc1a7a483 Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Tue, 28 Nov 2023 11:04:23 -0500 Subject: [PATCH 4/7] Update code with merge language instead of integrate, and leave some TODOs where those updates are on likely deprecated code --- merge.nf | 76 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/merge.nf b/merge.nf index 8600cdaf..0bc096ca 100644 --- a/merge.nf +++ b/merge.nf @@ -1,12 +1,18 @@ #!/usr/bin/env nextflow nextflow.enable.dsl=2 -// integration specific parameters -params.integration_metafile = 's3://ccdl-scpca-data/sample_info/scpca-integration-metadata.tsv' -params.integration_group = "All" +// Workflow to merge SCE objects into a single object. +// This workflow does NOT perform integration, i.e. batch correction. -// define path to integration template -integration_template = "${projectDir}/templates/integration-report.Rmd" + +// merge-specific parameters +// TODO: Update approach to define merge groupings. +params.merge_metafile = 's3://ccdl-scpca-data/sample_info/scpca-integration-metadata.tsv' +params.merge_group = "All" + +// define path to merge template +// TODO: Establish this merge-report.Rmd file +//merge_template = "${projectDir}/templates/merge-report.Rmd" // parameter checks param_error = false @@ -16,8 +22,8 @@ if (!file(params.run_metafile).exists()) { param_error = true } -if (!file(params.integration_metafile).exists()) { - log.error("The 'integration_metafile' file '${params.integration_metafile}' can not be found.") +if (!file(params.merge_metafile).exists()) { + log.error("The 'merge_metafile' file '${params.merge_metafile}' can not be found.") param_error = true } @@ -29,15 +35,15 @@ if(param_error){ process merge_sce { container params.SCPCATOOLS_CONTAINER label 'mem_16' - publishDir "${params.checkpoints_dir}/merged_sces" + publishDir "${params.checkpoints_dir}/merged" input: - tuple val(integration_group), val(library_ids), path(scpca_nf_file) + tuple val(merge_group), val(library_ids), path(scpca_nf_file) output: - tuple val(integration_group), path(merged_sce_file) + tuple val(merge_group), path(merged_sce_file) script: input_library_ids = library_ids.join(',') input_sces = scpca_nf_file.join(',') - merged_sce_file = "${integration_group}_merged.rds" + merged_sce_file = "${merge_group}_merged.rds" """ merge_sces.R \ --input_library_ids "${input_library_ids}" \ @@ -47,31 +53,31 @@ process merge_sce { --threads ${task.cpus} """ stub: - merged_sce_file = "${integration_group}_merged.rds" + merged_sce_file = "${merge_group}_merged.rds" """ touch ${merged_sce_file} """ } -// create integrated report and single object -process integration_report { +// create merge report and single object +process merge_report { container params.SCPCATOOLS_CONTAINER - publishDir "${params.results_dir}/integration/${integration_group}" + publishDir "${params.results_dir}/merged/${merge_group}" label 'mem_16' input: - tuple val(integration_group), path(integrated_sce_file) + tuple val(merge_group), path(merged_sce_file) path(report_template) output: - path(integration_report) + path(merge_report) script: - integration_report = "${integration_group}_summary_report.html" + merge_report = "${merge_group}_summary_report.html" """ Rscript -e "rmarkdown::render( \ '${report_template}', \ - output_file = '${integration_report}', \ - params = list(integration_group = '${integration_group}', \ - integrated_sce = '${integrated_sce_file}', \ + output_file = '${merge_report}', \ + params = list(merge_group = '${merge_group}', \ + merged_sce = '${merged_sce_file}', \ batch_column = 'library_id') \ )" """ @@ -81,18 +87,18 @@ process integration_report { workflow { // select projects to integrate from params - integration_groups = params.integration_group?.tokenize(',') ?: [] - integration_groups_all = integration_groups[0] == "All" // create logical for including all groups or not when filtering later + merge_groups = params.merge_group?.tokenize(',') ?: [] + merge_groups_all = merge_groups[0] == "All" // create logical for including all groups or not when filtering later // create channel of integration group and libraries to integrate - integration_meta_ch = Channel.fromPath(params.integration_metafile) + merge_meta_ch = Channel.fromPath(params.merge_metafile) .splitCsv(header: true, sep: '\t') .map{[ library_id: it.scpca_library_id, - integration_group: it.integration_group, + merge_group: it.merge_group, submitter: it.submitter ]} - .filter{integration_groups_all || (it.integration_group in integration_groups)} + .filter{merge_groups_all || (it.merge_group in merge_groups)} // channel with run metadata, keeping only the columns we need libraries_ch = Channel.fromPath(params.run_metafile) @@ -105,27 +111,21 @@ workflow { ]} .unique() - grouped_meta_ch = integration_meta_ch - .map{[it.library_id, it.integration_group]} + grouped_meta_ch = merge_meta_ch + .map{[it.library_id, it.merge_group]} // pull out library_id from meta and use to join .combine(libraries_ch.map{[it.library_id, it.scpca_nf_file]}, by: 0) // create tuple of integration group, library ID, and output file from scpca_nf .map{[ - it[1], // integration_group + it[1], // merge_group it[0], // library_id file(it[2]) // scpca_nf_file ]} - // grouped tuple of [integration_group, [library_id1, library_id2, ...], [sce_file1, sce_file2, ...]] + // grouped tuple of [merge_group, [library_id1, library_id2, ...], [sce_file1, sce_file2, ...]] .groupTuple(by: 0) merge_sce(grouped_meta_ch) - // integrate using fastmnn - integrate_fastmnn(merge_sce.out) - - // integrate using harmony - integrate_harmony(integrate_fastmnn.out) - - // generate integration report - integration_report(integrate_harmony.out, file(integration_template)) + // TODO: generate merge report + //merge_report(merge_sce.out, file(merge_template)) } From a52d2965e2f864edc92794cd63fd22c16968819d Mon Sep 17 00:00:00 2001 From: "Stephanie J. Spielman" Date: Tue, 28 Nov 2023 11:07:26 -0500 Subject: [PATCH 5/7] replace a few more integration words with merge --- merge.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/merge.nf b/merge.nf index 0bc096ca..5fb3c0e2 100644 --- a/merge.nf +++ b/merge.nf @@ -86,11 +86,11 @@ process merge_report { workflow { - // select projects to integrate from params + // select projects to merge from params merge_groups = params.merge_group?.tokenize(',') ?: [] merge_groups_all = merge_groups[0] == "All" // create logical for including all groups or not when filtering later - // create channel of integration group and libraries to integrate + // create channel of merge group and libraries to merge merge_meta_ch = Channel.fromPath(params.merge_metafile) .splitCsv(header: true, sep: '\t') .map{[ @@ -115,7 +115,7 @@ workflow { .map{[it.library_id, it.merge_group]} // pull out library_id from meta and use to join .combine(libraries_ch.map{[it.library_id, it.scpca_nf_file]}, by: 0) - // create tuple of integration group, library ID, and output file from scpca_nf + // create tuple of merge group, library ID, and output file from scpca_nf .map{[ it[1], // merge_group it[0], // library_id From 7b4125aa9ad74ed4fadebe6d35b55fee02a30f3a Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Mon, 4 Dec 2023 08:14:26 -0500 Subject: [PATCH 6/7] Update merge.nf Co-authored-by: Joshua Shapiro --- merge.nf | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/merge.nf b/merge.nf index 5fb3c0e2..a922bc73 100644 --- a/merge.nf +++ b/merge.nf @@ -81,7 +81,11 @@ process merge_report { batch_column = 'library_id') \ )" """ - + stub: + merge_report = "${merge_group}_summary_report.html" + """ + touch ${merge_report} + """ } workflow { From 90197cfde4fd97b6bee59d8026dfc66189d5e7cc Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Mon, 4 Dec 2023 08:17:39 -0500 Subject: [PATCH 7/7] Apply suggestions from code review --- merge.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/merge.nf b/merge.nf index a922bc73..3c5c2aaa 100644 --- a/merge.nf +++ b/merge.nf @@ -60,7 +60,7 @@ process merge_sce { } -// create merge report and single object +// create merge report process merge_report { container params.SCPCATOOLS_CONTAINER publishDir "${params.results_dir}/merged/${merge_group}"