Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move out integration processes #597

Merged
merged 10 commits into from
Dec 4, 2023
183 changes: 0 additions & 183 deletions integrate.nf
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you want to rename this while you are at it, since it isn't integration at all now?

This file was deleted.

131 changes: 131 additions & 0 deletions merge.nf
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apparently github felt this update was sufficiently different to call it a whole new file instead of a diff!

Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl=2

// Workflow to merge SCE objects into a single object.
// This workflow does NOT perform integration, i.e. batch correction.


// merge-specific parameters
// TODO: Update approach to define merge groupings.
params.merge_metafile = 's3://ccdl-scpca-data/sample_info/scpca-integration-metadata.tsv'
params.merge_group = "All"

// define path to merge template
// TODO: Establish this merge-report.Rmd file
//merge_template = "${projectDir}/templates/merge-report.Rmd"

// parameter checks
param_error = false

if (!file(params.run_metafile).exists()) {
log.error("The 'run_metafile' file '${params.run_metafile}' can not be found.")
param_error = true
}

if (!file(params.merge_metafile).exists()) {
log.error("The 'merge_metafile' file '${params.merge_metafile}' can not be found.")
param_error = true
}

if(param_error){
System.exit(1)
}

// merge individual SCE objects into one SCE object
process merge_sce {
container params.SCPCATOOLS_CONTAINER
label 'mem_16'
publishDir "${params.checkpoints_dir}/merged"
input:
tuple val(merge_group), val(library_ids), path(scpca_nf_file)
output:
tuple val(merge_group), path(merged_sce_file)
script:
input_library_ids = library_ids.join(',')
input_sces = scpca_nf_file.join(',')
merged_sce_file = "${merge_group}_merged.rds"
"""
merge_sces.R \
--input_library_ids "${input_library_ids}" \
--input_sce_files "${input_sces}" \
--output_sce_file "${merged_sce_file}" \
--n_hvg ${params.num_hvg} \
--threads ${task.cpus}
"""
stub:
merged_sce_file = "${merge_group}_merged.rds"
"""
touch ${merged_sce_file}
"""

}

// create merge report and single object
sjspielman marked this conversation as resolved.
Show resolved Hide resolved
process merge_report {
container params.SCPCATOOLS_CONTAINER
publishDir "${params.results_dir}/merged/${merge_group}"
label 'mem_16'
input:
tuple val(merge_group), path(merged_sce_file)
path(report_template)
output:
path(merge_report)
script:
merge_report = "${merge_group}_summary_report.html"
"""
Rscript -e "rmarkdown::render( \
'${report_template}', \
output_file = '${merge_report}', \
params = list(merge_group = '${merge_group}', \
merged_sce = '${merged_sce_file}', \
batch_column = 'library_id') \
)"
"""

sjspielman marked this conversation as resolved.
Show resolved Hide resolved
}

workflow {

// select projects to integrate from params
merge_groups = params.merge_group?.tokenize(',') ?: []
merge_groups_all = merge_groups[0] == "All" // create logical for including all groups or not when filtering later

// create channel of integration group and libraries to integrate
merge_meta_ch = Channel.fromPath(params.merge_metafile)
.splitCsv(header: true, sep: '\t')
.map{[
library_id: it.scpca_library_id,
merge_group: it.merge_group,
submitter: it.submitter
]}
.filter{merge_groups_all || (it.merge_group in merge_groups)}

// channel with run metadata, keeping only the columns we need
libraries_ch = Channel.fromPath(params.run_metafile)
.splitCsv(header: true, sep: '\t')
// only include single-cell/single-nuclei and make sure no CITE-seq/ hashing libraries
.filter{it.seq_unit in ['cell', 'nucleus']}
.map{[
library_id: it.scpca_library_id,
scpca_nf_file: "${params.results_dir}/${it.scpca_project_id}/${it.scpca_sample_id}/${it.scpca_library_id}_processed.rds"
]}
.unique()

grouped_meta_ch = merge_meta_ch
.map{[it.library_id, it.merge_group]}
// pull out library_id from meta and use to join
.combine(libraries_ch.map{[it.library_id, it.scpca_nf_file]}, by: 0)
// create tuple of integration group, library ID, and output file from scpca_nf
.map{[
it[1], // merge_group
it[0], // library_id
file(it[2]) // scpca_nf_file
]}
// grouped tuple of [merge_group, [library_id1, library_id2, ...], [sce_file1, sce_file2, ...]]
.groupTuple(by: 0)

merge_sce(grouped_meta_ch)

// TODO: generate merge report
//merge_report(merge_sce.out, file(merge_template))
}
52 changes: 52 additions & 0 deletions modules/integrate-sces.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// These processes are not currently used by any workflow.

// Process to integrate SCEs with fastMNN
process integrate_fastmnn {
container params.SCPCATOOLS_CONTAINER
label 'mem_16'
label 'cpus_4'
input:
tuple val(integration_group), path(merged_sce_file)
output:
tuple val(integration_group), path(integrated_sce_file)
script:
integrated_sce_file = "${integration_group}.rds"
"""
integrate_sce.R \
--input_sce_file "${merged_sce_file}" \
--output_sce_file "${integrated_sce_file}" \
--method "fastMNN" \
--seed ${params.seed} \
--threads ${task.cpus}
"""
stub:
integrated_sce_file = "${integration_group}.rds"
"""
touch ${integrated_sce_file}
"""
}

// Process to integrate SCEs with Harmony
process integrate_harmony {
container params.SCPCATOOLS_CONTAINER
publishDir "${params.results_dir}/integration/${integration_group}"
label 'mem_16'
input:
tuple val(integration_group), path(merged_sce_file)
output:
tuple val(integration_group), path(integrated_sce_file)
script:
integrated_sce_file = "${integration_group}.rds"
"""
integrate_sce.R \
--input_sce_file "${merged_sce_file}" \
--output_sce_file "${integrated_sce_file}" \
--method "harmony" \
--seed ${params.seed}
"""
stub:
integrated_sce_file = "${integration_group}.rds"
"""
touch ${integrated_sce_file}
"""
}