From 517015b1a0f811297aed68b87fcd69be2ae2bed5 Mon Sep 17 00:00:00 2001 From: Thomas Tams Date: Mon, 28 Oct 2024 14:14:06 +0000 Subject: [PATCH 1/2] add: activation of gsea_run and supplying gene set file --- conf/test_experimental.config | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/conf/test_experimental.config b/conf/test_experimental.config index 88506222..5de725e4 100644 --- a/conf/test_experimental.config +++ b/conf/test_experimental.config @@ -47,4 +47,9 @@ params { // Exploratory exploratory_main_variable = 'contrasts' + + // Activate GSEA + gsea_run = true + gene_sets_files = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt' + } From b093261fdcdf86c5e086a33e3081cbddebb5bde8 Mon Sep 17 00:00:00 2001 From: Thomas Tams Date: Mon, 28 Oct 2024 14:51:13 +0000 Subject: [PATCH 2/2] feat: added GSEA to ENRICHMENT subworkflow, feeding channels from differentialabundance down to enrichment subworkflow --- subworkflows/local/enrichment/main.nf | 62 +++++++++++++++++++++++++ subworkflows/local/experimental/main.nf | 10 +++- workflows/differentialabundance.nf | 5 +- 3 files changed, 75 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/enrichment/main.nf b/subworkflows/local/enrichment/main.nf index 936ddfcc..f99bd24d 100644 --- a/subworkflows/local/enrichment/main.nf +++ b/subworkflows/local/enrichment/main.nf @@ -4,6 +4,11 @@ include { MYGENE } from "../../../modules/nf-core/mygene/main.nf" include { PROPR_GREA as GREA } from "../../../modules/local/propr/grea/main.nf" +include { GSEA_GSEA } from '../../../modules/nf-core/gsea/gsea/main.nf' +include { CUSTOM_TABULARTOGSEAGCT } from '../../../modules/nf-core/custom/tabulartogseagct/main.nf' +include { CUSTOM_TABULARTOGSEACLS } from '../../../modules/nf-core/custom/tabulartogseacls/main.nf' +include { TABULAR_TO_GSEA_CHIP } from '../../../modules/local/tabular_to_gsea_chip' + workflow ENRICHMENT { take: ch_tools // [ pathway_name, enrichment_map ] @@ -11,6 +16,11 @@ workflow ENRICHMENT { ch_results_genewise ch_results_genewise_filtered ch_adjacency + ch_contrasts + ch_samplesheet + ch_featuresheet + ch_gene_sets + ch_versions // TODO: add ch_gm when provided by user, etc. main: @@ -54,6 +64,58 @@ workflow ENRICHMENT { // todo: add gsea here + // For GSEA, we need to convert normalised counts to a GCT format for + // input, and process the sample sheet to generate class definitions + // (CLS) for the variable used in each contrast + + CUSTOM_TABULARTOGSEAGCT ( ch_counts ) + + // TODO: update CUSTOM_TABULARTOGSEACLS for value channel input per new + // guidlines (rather than meta usage employed here) + ch_contrasts_and_samples = ch_contrasts + .map{it[0]} // revert back to contrasts meta map + .combine( ch_samplesheet.map { it[1] } ) + + CUSTOM_TABULARTOGSEACLS(ch_contrasts_and_samples) + + TABULAR_TO_GSEA_CHIP( + ch_featuresheet.map{ it[1] }, + [params.features_id_col, params.features_name_col] + ) + + // The normalised matrix does not always have a contrast meta, so we + // need a combine rather than a join here + // Also add file name to metamap for easy access from modules.config + + ch_gsea_inputs = CUSTOM_TABULARTOGSEAGCT.out.gct + .map{ it.tail() } + .combine(CUSTOM_TABULARTOGSEACLS.out.cls) + .map{ tuple(it[1], it[0], it[2]) } + .combine(ch_gene_sets) + + GSEA_GSEA( + ch_gsea_inputs, + ch_gsea_inputs.map{ tuple(it[0].reference, it[0].target) }, // * + TABULAR_TO_GSEA_CHIP.out.chip.first() + ) + + // * Note: GSEA module currently uses a value channel for the mandatory + // non-file arguments used to define contrasts, hence the indicated + // usage of map to perform that transformation. An active subject of + // debate + GSEA_GSEA.out.report_tsvs_ref.view() + ch_gsea_results = GSEA_GSEA.out.report_tsvs_ref + .join(GSEA_GSEA.out.report_tsvs_target) + + ch_enriched = ch_enriched.combine(ch_gsea_results) + + + // Record GSEA versions + ch_versions = ch_versions + .mix(TABULAR_TO_GSEA_CHIP.out.versions) + .mix(GSEA_GSEA.out.versions) + + // ---------------------------------------------------- // Perform enrichment analysis with gprofiler2 // ---------------------------------------------------- diff --git a/subworkflows/local/experimental/main.nf b/subworkflows/local/experimental/main.nf index 5376b592..bf355210 100644 --- a/subworkflows/local/experimental/main.nf +++ b/subworkflows/local/experimental/main.nf @@ -9,8 +9,11 @@ workflow EXPERIMENTAL { take: ch_contrasts // [ meta, contrast_variable, reference, target ] ch_samplesheet // [ meta, samplesheet ] + ch_featuresheet // [ meta, featuresheet ] + ch_gene_sets ch_counts // [ meta, counts] ch_tools // [ pathway_name, differential_map, correlation_map, enrichment_map ] + ch_versions main: @@ -70,7 +73,12 @@ workflow EXPERIMENTAL { ch_counts, ch_results_genewise, ch_results_genewise_filtered, - ch_adjacency + ch_adjacency, + ch_contrasts, + ch_samplesheet, + ch_featuresheet, + ch_gene_sets, + ch_versions ) ch_enriched = ch_enriched.mix(ENRICHMENT.out.enriched) diff --git a/workflows/differentialabundance.nf b/workflows/differentialabundance.nf index 69b23d2c..3a79bd3c 100644 --- a/workflows/differentialabundance.nf +++ b/workflows/differentialabundance.nf @@ -387,8 +387,11 @@ workflow DIFFERENTIALABUNDANCE { EXPERIMENTAL( ch_contrasts, VALIDATOR.out.sample_meta, + VALIDATOR.out.feature_meta, + ch_gene_sets, CUSTOM_MATRIXFILTER.out.filtered, - ch_tools + ch_tools, + ch_versions ) // TODO for the moment, these channels are allocated to not breaking the next part.