diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 44bb4c5f..e6d5d4df 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -64,7 +64,7 @@ jobs: run: | if [[ "${{ matrix.tags }}" == "test_motus" ]]; then wget https://raw.githubusercontent.com/motu-tool/mOTUs/master/motus/downloadDB.py - python downloadDB.py > download_db_log.txt + python downloadDB.py --no-download-progress echo 'tool,db_name,db_params,db_path' > 'database_motus.csv' echo "motus,db_mOTU,,db_mOTU" >> 'database_motus.csv' nextflow run ${GITHUB_WORKSPACE} -profile docker,${{ matrix.tags }} --databases ./database_motus.csv --outdir ./results_${{ matrix.tags }}; diff --git a/CHANGELOG.md b/CHANGELOG.md index df1449e1..bbebe94c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` - [#336](https://github.com/nf-core/taxprofiler/issues/336) Replace samplesheet check with nf-validation for both sample and database input sheets (fix by @LilyAnderssonLee) +- [#460](https://github.com/nf-core/taxprofiler/issues/460) corrected the channel transformations to combine Kaiju and mOTUs reports with their reference databases (fix by @Midnighter) ### `Dependencies` diff --git a/conf/modules.config b/conf/modules.config index f5a5e631..1956605e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -502,7 +502,7 @@ process { } withName: KRAKENTOOLS_COMBINEKREPORTS_KRAKEN { - ext.prefix = { "kraken2_${meta.db_name}_combined_reports" } + ext.prefix = { "kraken2_${meta.id}_combined_reports" } publishDir = [ path: { "${params.outdir}/kraken2/" }, mode: params.publish_dir_mode, diff --git a/subworkflows/local/standardisation_profiles.nf b/subworkflows/local/standardisation_profiles.nf index 4592e9de..95cd9d3f 100644 --- a/subworkflows/local/standardisation_profiles.nf +++ b/subworkflows/local/standardisation_profiles.nf @@ -15,25 +15,43 @@ include { GANON_TABLE // Custom Functions /** -* Combine profiles with their original database, then separate into two channels. +* Group all profiles per reference database. * -* The channel elements are assumed to be tuples one of [ meta, profile ], and the -* database to be of [db_key, meta, database_file]. +* @param ch_profiles A channel containing pairs of a meta map and the report of +* a given profiler, where meta must contain a key `db_name`. +* @return A channel with one element per reference database. Each element is a +* pair of a meta map with an `id` key and all corresponding profiles. +*/ +def groupProfiles(ch_profiles, groupTupleOptions = [:]) { + return ch_profiles + .map { meta, profile -> [meta.db_name, profile] } + .groupTuple(groupTupleOptions) + .map { db_name, profiles -> [[id: db_name], profiles] } +} + +/** +* Combine profiles with their corresponding reference database, then separate into two channels. * -* @param ch_profile A channel containing a meta and the profilign report of a given profiler -* @param ch_database A channel containing a key, the database meta, and the database file/folders itself -* @return A multiMap'ed output channel with two sub channels, one with the profile and the other with the db +* The combined results are returned on multiple channels, where the element +* position for the profiles in one channel is the same as the position of the +* corresponding database element in the other channel. +* +* @param ch_profiles A channel containing pairs of a meta map with an `id` key +* for a reference database, and all the corresponding profiling reports. +* @param ch_database A channel containing pairs of a database meta map and the +* database itself. +* @return A multiMap'ed output channel with two sub channels, one with the +* profiles (`profile`) and the other with the corresponding database (`db`). */ -def combineProfilesWithDatabase(ch_profile, ch_database) { - -return ch_profile - .map { meta, profile -> [meta.db_name, meta, profile] } - .combine(ch_database, by: 0) - .multiMap { - key, meta, profile, db_meta, db -> - profile: [meta, profile] - db: db - } +def combineProfilesWithDatabase(ch_profiles, ch_database) { + return ch_profiles + .map { meta, profile -> [meta.id, meta, profile] } + .combine(ch_database.map { db_meta, db -> [db_meta.db_name, db] }, by: 0) + .multiMap { + key, meta, profile, db -> + profile: [meta, profile] + db: db + } } workflow STANDARDISATION_PROFILES { @@ -117,12 +135,7 @@ workflow STANDARDISATION_PROFILES { // Bracken - ch_profiles_for_bracken = ch_input_profiles.bracken - .map { [it[0]['db_name'], it[1]] } - .groupTuple() - .map { - [[id:it[0]], it[1]] - } + ch_profiles_for_bracken = groupProfiles(ch_input_profiles.bracken) BRACKEN_COMBINEBRACKENOUTPUTS ( ch_profiles_for_bracken ) @@ -131,13 +144,10 @@ workflow STANDARDISATION_PROFILES { // Collect and replace id for db_name for prefix // Have to sort by size to ensure first file actually has hits otherwise // the script fails - ch_profiles_for_centrifuge = ch_input_profiles.centrifuge - .map { [it[0]['db_name'], it[1]] } - .groupTuple(sort: {-it.size()} ) - .map { - [[id:it[0]], it[1]] - } - + ch_profiles_for_centrifuge = groupProfiles( + ch_input_profiles.centrifuge, + [sort: { -it.size() }] + ) KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE ( ch_profiles_for_centrifuge ) ch_multiqc_files = ch_multiqc_files.mix( KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE.out.txt ) @@ -146,12 +156,7 @@ workflow STANDARDISATION_PROFILES { // Kaiju // Collect and replace id for db_name for prefix - ch_profiles_for_kaiju = ch_input_classifications.kaiju - .map { [it[0]['db_name'], it[1]] } - .groupTuple() - .map { - [[id:it[0]], it[1]] - } + ch_profiles_for_kaiju = groupProfiles(ch_input_classifications.kaiju) ch_input_for_kaiju2tablecombine = combineProfilesWithDatabase(ch_profiles_for_kaiju, ch_input_databases.kaiju) @@ -164,16 +169,15 @@ workflow STANDARDISATION_PROFILES { // Collect and replace id for db_name for prefix // Have to sort by size to ensure first file actually has hits otherwise // the script fails - ch_profiles_for_kraken2 = ch_input_profiles.kraken2 - .map { - meta, profiles -> - def new_meta = [:] - new_meta.tool = meta.tool == 'kraken2-bracken' ? 'kraken2' : meta.tool // replace to get the right output-format description - new_meta.id = meta.tool // append so to disambiguate when we have same databases for kraken2 step of bracken, with normal bracken - new_meta.db_name = meta.tool == 'kraken2-bracken' ? "${meta.db_name}-bracken" : "${meta.db_name}" // append so to disambiguate when we have same databases for kraken2 step of bracken, with normal bracken - [ new_meta, profiles ] - } - .groupTuple(sort: {-it.size()}) + ch_profiles_for_kraken2 = groupProfiles( + ch_input_profiles.kraken2 + .map { meta, profile -> + // Replace database name, to get the right output description. + def db_name = meta.tool == 'kraken2-bracken' ? "${meta.db_name}-bracken" : "${meta.db_name}" + return [meta + [db_name: db_name], profile] + }, + [sort: { -it.size() }] + ) KRAKENTOOLS_COMBINEKREPORTS_KRAKEN ( ch_profiles_for_kraken2 ) ch_multiqc_files = ch_multiqc_files.mix( KRAKENTOOLS_COMBINEKREPORTS_KRAKEN.out.txt ) @@ -181,12 +185,7 @@ workflow STANDARDISATION_PROFILES { // MetaPhlAn - ch_profiles_for_metaphlan = ch_input_profiles.metaphlan - .map { [it[0]['db_name'], it[1]] } - .groupTuple() - .map { - [[id:it[0]], it[1]] - } + ch_profiles_for_metaphlan = groupProfiles(ch_input_profiles.metaphlan) METAPHLAN_MERGEMETAPHLANTABLES ( ch_profiles_for_metaphlan ) ch_multiqc_files = ch_multiqc_files.mix( METAPHLAN_MERGEMETAPHLANTABLES.out.txt ) @@ -198,12 +197,7 @@ workflow STANDARDISATION_PROFILES { // Therefore removing db info here, and publish merged at root mOTUs results // directory - ch_profiles_for_motus = ch_input_profiles.motus - .map { [it[0]['db_name'], it[1]] } - .groupTuple() - .map { - [[id:it[0]], it[1]] - } + ch_profiles_for_motus = groupProfiles(ch_input_profiles.motus) ch_input_for_motusmerge = combineProfilesWithDatabase(ch_profiles_for_motus, ch_input_databases.motus) @@ -212,12 +206,7 @@ workflow STANDARDISATION_PROFILES { // Ganon - ch_profiles_for_ganon = ch_input_profiles.ganon - .map { [it[0]['db_name'], it[1]] } - .groupTuple() - .map { - [[id:it[0]], it[1]] - } + ch_profiles_for_ganon = groupProfiles(ch_input_profiles.ganon) GANON_TABLE ( ch_profiles_for_ganon ) ch_multiqc_files = ch_multiqc_files.mix( GANON_TABLE.out.txt )