Skip to content

Commit

Permalink
[Fix gprofiler] Empty output table is now always added (nf-core#4583)
Browse files Browse the repository at this point in the history
* Bugfix, empty output table is now always added

* Replaced multiple contrast params with one prefix param
  • Loading branch information
WackerO authored Dec 13, 2023
1 parent 4be1afb commit ef6053b
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 50 deletions.
23 changes: 11 additions & 12 deletions modules/nf-core/gprofiler2/gost/main.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
process GPROFILER2_GOST {
tag "$meta"
tag "$meta.id"
label 'process_single'

conda "${moduleDir}/environment.yml"
Expand All @@ -8,21 +8,20 @@ process GPROFILER2_GOST {
'biocontainers/mulled-v2-3712554873398d849d0d11b22440f41febbc4ede:aa19bb8afc0ec6456a4f3cd650f7577c3bbdd4f3-0' }"

input:
tuple val(meta), val(contrast_variable), val(reference), val(target)
tuple val(meta1), path(de_file)
tuple val(meta), path(de_file)
path(gmt_file)
path(background_file)

output:
tuple val(meta), path("*gprofiler2.*all_enriched_pathways.tsv") , emit: all_enrich
tuple val(meta), path("*gprofiler2.*gost_results.rds") , emit: rds , optional: true
tuple val(meta), path("*gprofiler2.*gostplot.png") , emit: plot_png , optional: true
tuple val(meta), path("*gprofiler2.*gostplot.html") , emit: plot_html , optional: true
tuple val(meta), path("*gprofiler2.*sub_enriched_pathways.tsv") , emit: sub_enrich , optional: true
tuple val(meta), path("*gprofiler2.*sub_enriched_pathways.png") , emit: sub_plot , optional: true
tuple val(meta), path("*ENSG_filtered.gmt") , emit: filtered_gmt, optional: true
tuple val(meta), path("*R_sessionInfo.log") , emit: session_info
path "versions.yml" , emit: versions
tuple val(meta), path("*.gprofiler2.all_enriched_pathways.tsv") , emit: all_enrich
tuple val(meta), path("*.gprofiler2.gost_results.rds") , emit: rds , optional: true
tuple val(meta), path("*.gprofiler2.gostplot.png") , emit: plot_png , optional: true
tuple val(meta), path("*.gprofiler2.gostplot.html") , emit: plot_html , optional: true
tuple val(meta), path("*.gprofiler2.*.sub_enriched_pathways.tsv") , emit: sub_enrich , optional: true
tuple val(meta), path("*.gprofiler2.*.sub_enriched_pathways.png") , emit: sub_plot , optional: true
tuple val(meta), path("*ENSG_filtered.gmt") , emit: filtered_gmt, optional: true
tuple val(meta), path("*R_sessionInfo.log") , emit: session_info
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when
Expand Down
27 changes: 9 additions & 18 deletions modules/nf-core/gprofiler2/gost/templates/gprofiler2_gost.R
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,7 @@ opt <- list(
de_id_column = 'gene_id',
organism = NULL,
sources = NULL,
contrast_variable = '$contrast_variable',
reference_level = '$reference',
target_level = '$target',
blocking_variables = NULL,
output_prefix = ifelse('$task.ext.prefix' == 'null', '$meta.id', '$task.ext.prefix'),
significant = T,
measure_underrepresentation = F,
correction_method = 'gSCS',
Expand Down Expand Up @@ -162,9 +159,8 @@ for ( ao in names(args_opt)) {
opt[[ao]] <- args_opt[[ao]]
}
}

# Check if required parameters have been provided
required_opts <- c('contrast_variable', 'reference_level', 'target_level')
required_opts <- c('output_prefix')
missing <- required_opts[unlist(lapply(opt[required_opts], is.null)) | ! required_opts %in% names(opt)]

if (length(missing) > 0) {
Expand Down Expand Up @@ -206,13 +202,10 @@ de.genes <-
file = opt\$de_file
)

# Create prefix from contrast var, reference and target as well as blocking (if provided)
contrast_name <- paste(opt\$contrast_variable, opt\$reference_level, opt\$target_level, sep = '_')
output_prefix <- paste('gprofiler2', contrast_name, sep = '.')
if (!is.null(opt\$blocking_variables)) {
blocking_variables <- paste(make.names(unlist(strsplit(opt\$blocking_variables, split = ','))), collapse = '_')
output_prefix <- paste(output_prefix, blocking_variables, sep= '_')
}
output_prefix <- paste0(opt\$output_prefix, ".gprofiler2")

# Create empty output table as it is a mandatory output
file.create(paste(output_prefix, 'all_enriched_pathways', 'tsv', sep = '.'))

if (nrow(de.genes) > 0) {

Expand Down Expand Up @@ -328,8 +321,7 @@ if (nrow(de.genes) > 0) {

# Name the query as it will otherwise be called 'query_1' which will also determine the gostplot title
q <- list(query)
names(q) <- c(paste0(contrast_name, ifelse(!is.null(opt\$blocking_variables), paste0("_", blocking_variables), "")))

names(q) <- c(output_prefix)
gost_results <- gost(
query=q,
organism=token,
Expand Down Expand Up @@ -365,7 +357,7 @@ if (nrow(de.genes) > 0) {

# R object for other processes to use

saveRDS(gost_results, file = paste(opt\$contrast_variable, 'gprofiler2.gost_results.rds', sep = '.'))
saveRDS(gost_results, file = paste(output_prefix, 'gost_results.rds', sep = '.'))

# Write full enrichment table (except parents column as that one throws an error)

Expand All @@ -385,6 +377,7 @@ if (nrow(de.genes) > 0) {

# Iterate over the enrichment results by source and save separate tables
for (df in split(gost_results\$result, gost_results\$result\$source)){

db_source <- df\$source[1]
df_subset <- data.frame(
Pathway_name = df\$term_name,
Expand Down Expand Up @@ -430,8 +423,6 @@ if (nrow(de.genes) > 0) {
}
}
} else {
# Create empty output table as it is a mandatory output
file.create(paste(output_prefix, 'all_enriched_pathways', 'tsv', sep = '.'))
print("No differential features found, pathway enrichment analysis with gprofiler2 will be skipped.")
}

Expand Down
6 changes: 2 additions & 4 deletions tests/modules/nf-core/gprofiler2/gost/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,11 @@ include { GPROFILER2_GOST } from '../../../../../modules/nf-core/gprofiler2/gost
workflow test_gprofiler2_gost {
contrasts = [ [ id:'test', reference:'r', target:'t' ], 'test', 'r', 't' ]
input = [
[ id:'test', reference:'r', target:'t' ], // meta map
[ id:'test_r_t', reference:'r', target:'t' ], // meta map
file(params.test_data['mus_musculus']['genome']['deseq_results'], checkIfExists: true)
]

GPROFILER2_GOST (
contrasts,
input,
[],
[]
Expand All @@ -22,13 +21,12 @@ workflow test_gprofiler2_gost {
workflow test_gprofiler2_gost_backgroundmatrix {
contrasts = [ [ id:'test', reference:'r', target:'t' ], 'test', 'r', 't' ]
input = [
[ id:'test', reference:'r', target:'t' ], // meta map
[ id:'test_r_t', reference:'r', target:'t' ], // meta map
file(params.test_data['mus_musculus']['genome']['deseq_results'], checkIfExists: true)
]
ch_background = Channel.from(file(params.test_data['mus_musculus']['genome']['rnaseq_matrix'], checkIfExists: true))

GPROFILER2_GOST (
contrasts,
input,
[],
ch_background
Expand Down
32 changes: 16 additions & 16 deletions tests/modules/nf-core/gprofiler2/gost/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,25 @@
files:
- path: output/gprofiler2/R_sessionInfo.log
contains: ["ggplot2_3.4.3", "gprofiler2_0.2.2"]
- path: output/gprofiler2/gprofiler2.test_r_t.CORUM.sub_enriched_pathways.png
- path: output/gprofiler2/gprofiler2.test_r_t.CORUM.sub_enriched_pathways.tsv
- path: output/gprofiler2/test_r_t.gprofiler2.CORUM.sub_enriched_pathways.png
- path: output/gprofiler2/test_r_t.gprofiler2.CORUM.sub_enriched_pathways.tsv
md5sum: b0619e5e1424ac18f6cb52dac87248aa
- path: output/gprofiler2/gprofiler2.test_r_t.REAC.sub_enriched_pathways.png
- path: output/gprofiler2/gprofiler2.test_r_t.REAC.sub_enriched_pathways.tsv
- path: output/gprofiler2/test_r_t.gprofiler2.REAC.sub_enriched_pathways.png
- path: output/gprofiler2/test_r_t.gprofiler2.REAC.sub_enriched_pathways.tsv
md5sum: 0e4f2887e74e00fe8d0ad93771342f4b
- path: output/gprofiler2/gprofiler2.test_r_t.all_enriched_pathways.tsv
md5sum: 231bdf9e12394dbb37d9bd427da36fb5
- path: output/gprofiler2/gprofiler2.test_r_t.gostplot.html
- path: output/gprofiler2/test_r_t.gprofiler2.all_enriched_pathways.tsv
md5sum: bbfa48be9e30e9898ecf63709bb99331
- path: output/gprofiler2/test_r_t.gprofiler2.gostplot.html
contains:
[
"Neurotransmitter receptors and postsynaptic signal transmission",
"The phototransduction cascade",
"Muscle contraction",
]
- path: output/gprofiler2/gprofiler2.test_r_t.gostplot.png
- path: output/gprofiler2/test_r_t.gprofiler2.gostplot.png
- path: output/gprofiler2/gprofiler_full_mmusculus.CORUM_REAC.ENSG_filtered.gmt
md5sum: 455f9b94af175e78cc551cf7f79c3203
- path: output/gprofiler2/test.gprofiler2.gost_results.rds
- path: output/gprofiler2/test_r_t.gprofiler2.gost_results.rds
- path: output/gprofiler2/versions.yml

- name: gprofiler2 gost test_gprofiler2_gost_backgroundmatrix
Expand All @@ -35,13 +35,13 @@
files:
- path: output/gprofiler2/R_sessionInfo.log
contains: ["ggplot2_3.4.3", "gprofiler2_0.2.2"]
- path: output/gprofiler2/gprofiler2.test_r_t.KEGG.sub_enriched_pathways.png
- path: output/gprofiler2/gprofiler2.test_r_t.KEGG.sub_enriched_pathways.tsv
- path: output/gprofiler2/test_r_t.gprofiler2.KEGG.sub_enriched_pathways.png
- path: output/gprofiler2/test_r_t.gprofiler2.KEGG.sub_enriched_pathways.tsv
md5sum: fa7c0457da981ca00b209fadafe419bd
- path: output/gprofiler2/gprofiler2.test_r_t.all_enriched_pathways.tsv
md5sum: 6eeff6d8067f161425184039d68d5d1f
- path: output/gprofiler2/gprofiler2.test_r_t.gostplot.html # This plot is empty, will not add contains
- path: output/gprofiler2/gprofiler2.test_r_t.gostplot.png
- path: output/gprofiler2/test_r_t.gprofiler2.all_enriched_pathways.tsv
md5sum: fd611ba563ed8ec2c27dbf8d99b869dd
- path: output/gprofiler2/test_r_t.gprofiler2.gostplot.html # This plot is empty, will not add contains
- path: output/gprofiler2/test_r_t.gprofiler2.gostplot.png
- path: output/gprofiler2/gprofiler_full_mmusculus.KEGG.ENSG_filtered.gmt
- path: output/gprofiler2/test.gprofiler2.gost_results.rds
- path: output/gprofiler2/test_r_t.gprofiler2.gost_results.rds
- path: output/gprofiler2/versions.yml

0 comments on commit ef6053b

Please sign in to comment.