Skip to content

Commit

Permalink
Merge pull request #464 from nf-core/fix-combine
Browse files Browse the repository at this point in the history
Fix combine
  • Loading branch information
Midnighter authored Apr 7, 2024
2 parents 593c013 + 399b87c commit a9afbc0
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 65 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ jobs:
run: |
if [[ "${{ matrix.tags }}" == "test_motus" ]]; then
wget https://raw.githubusercontent.com/motu-tool/mOTUs/master/motus/downloadDB.py
python downloadDB.py > download_db_log.txt
python downloadDB.py --no-download-progress
echo 'tool,db_name,db_params,db_path' > 'database_motus.csv'
echo "motus,db_mOTU,,db_mOTU" >> 'database_motus.csv'
nextflow run ${GITHUB_WORKSPACE} -profile docker,${{ matrix.tags }} --databases ./database_motus.csv --outdir ./results_${{ matrix.tags }};
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### `Fixed`

- [#336](https://github.com/nf-core/taxprofiler/issues/336) Replace samplesheet check with nf-validation for both sample and database input sheets (fix by @LilyAnderssonLee)
- [#460](https://github.com/nf-core/taxprofiler/issues/460) corrected the channel transformations to combine Kaiju and mOTUs reports with their reference databases (fix by @Midnighter)

### `Dependencies`

Expand Down
2 changes: 1 addition & 1 deletion conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,7 @@ process {
}

withName: KRAKENTOOLS_COMBINEKREPORTS_KRAKEN {
ext.prefix = { "kraken2_${meta.db_name}_combined_reports" }
ext.prefix = { "kraken2_${meta.id}_combined_reports" }
publishDir = [
path: { "${params.outdir}/kraken2/" },
mode: params.publish_dir_mode,
Expand Down
115 changes: 52 additions & 63 deletions subworkflows/local/standardisation_profiles.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,43 @@ include { GANON_TABLE
// Custom Functions

/**
* Combine profiles with their original database, then separate into two channels.
* Group all profiles per reference database.
*
* The channel elements are assumed to be tuples one of [ meta, profile ], and the
* database to be of [db_key, meta, database_file].
* @param ch_profiles A channel containing pairs of a meta map and the report of
* a given profiler, where meta must contain a key `db_name`.
* @return A channel with one element per reference database. Each element is a
* pair of a meta map with an `id` key and all corresponding profiles.
*/
def groupProfiles(ch_profiles, groupTupleOptions = [:]) {
return ch_profiles
.map { meta, profile -> [meta.db_name, profile] }
.groupTuple(groupTupleOptions)
.map { db_name, profiles -> [[id: db_name], profiles] }
}

/**
* Combine profiles with their corresponding reference database, then separate into two channels.
*
* @param ch_profile A channel containing a meta and the profilign report of a given profiler
* @param ch_database A channel containing a key, the database meta, and the database file/folders itself
* @return A multiMap'ed output channel with two sub channels, one with the profile and the other with the db
* The combined results are returned on multiple channels, where the element
* position for the profiles in one channel is the same as the position of the
* corresponding database element in the other channel.
*
* @param ch_profiles A channel containing pairs of a meta map with an `id` key
* for a reference database, and all the corresponding profiling reports.
* @param ch_database A channel containing pairs of a database meta map and the
* database itself.
* @return A multiMap'ed output channel with two sub channels, one with the
* profiles (`profile`) and the other with the corresponding database (`db`).
*/
def combineProfilesWithDatabase(ch_profile, ch_database) {

return ch_profile
.map { meta, profile -> [meta.db_name, meta, profile] }
.combine(ch_database, by: 0)
.multiMap {
key, meta, profile, db_meta, db ->
profile: [meta, profile]
db: db
}
def combineProfilesWithDatabase(ch_profiles, ch_database) {
return ch_profiles
.map { meta, profile -> [meta.id, meta, profile] }
.combine(ch_database.map { db_meta, db -> [db_meta.db_name, db] }, by: 0)
.multiMap {
key, meta, profile, db ->
profile: [meta, profile]
db: db
}
}

workflow STANDARDISATION_PROFILES {
Expand Down Expand Up @@ -117,12 +135,7 @@ workflow STANDARDISATION_PROFILES {

// Bracken

ch_profiles_for_bracken = ch_input_profiles.bracken
.map { [it[0]['db_name'], it[1]] }
.groupTuple()
.map {
[[id:it[0]], it[1]]
}
ch_profiles_for_bracken = groupProfiles(ch_input_profiles.bracken)

BRACKEN_COMBINEBRACKENOUTPUTS ( ch_profiles_for_bracken )

Expand All @@ -131,13 +144,10 @@ workflow STANDARDISATION_PROFILES {
// Collect and replace id for db_name for prefix
// Have to sort by size to ensure first file actually has hits otherwise
// the script fails
ch_profiles_for_centrifuge = ch_input_profiles.centrifuge
.map { [it[0]['db_name'], it[1]] }
.groupTuple(sort: {-it.size()} )
.map {
[[id:it[0]], it[1]]
}

ch_profiles_for_centrifuge = groupProfiles(
ch_input_profiles.centrifuge,
[sort: { -it.size() }]
)

KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE ( ch_profiles_for_centrifuge )
ch_multiqc_files = ch_multiqc_files.mix( KRAKENTOOLS_COMBINEKREPORTS_CENTRIFUGE.out.txt )
Expand All @@ -146,12 +156,7 @@ workflow STANDARDISATION_PROFILES {
// Kaiju

// Collect and replace id for db_name for prefix
ch_profiles_for_kaiju = ch_input_classifications.kaiju
.map { [it[0]['db_name'], it[1]] }
.groupTuple()
.map {
[[id:it[0]], it[1]]
}
ch_profiles_for_kaiju = groupProfiles(ch_input_classifications.kaiju)

ch_input_for_kaiju2tablecombine = combineProfilesWithDatabase(ch_profiles_for_kaiju, ch_input_databases.kaiju)

Expand All @@ -164,29 +169,23 @@ workflow STANDARDISATION_PROFILES {
// Collect and replace id for db_name for prefix
// Have to sort by size to ensure first file actually has hits otherwise
// the script fails
ch_profiles_for_kraken2 = ch_input_profiles.kraken2
.map {
meta, profiles ->
def new_meta = [:]
new_meta.tool = meta.tool == 'kraken2-bracken' ? 'kraken2' : meta.tool // replace to get the right output-format description
new_meta.id = meta.tool // append so to disambiguate when we have same databases for kraken2 step of bracken, with normal bracken
new_meta.db_name = meta.tool == 'kraken2-bracken' ? "${meta.db_name}-bracken" : "${meta.db_name}" // append so to disambiguate when we have same databases for kraken2 step of bracken, with normal bracken
[ new_meta, profiles ]
}
.groupTuple(sort: {-it.size()})
ch_profiles_for_kraken2 = groupProfiles(
ch_input_profiles.kraken2
.map { meta, profile ->
// Replace database name, to get the right output description.
def db_name = meta.tool == 'kraken2-bracken' ? "${meta.db_name}-bracken" : "${meta.db_name}"
return [meta + [db_name: db_name], profile]
},
[sort: { -it.size() }]
)

KRAKENTOOLS_COMBINEKREPORTS_KRAKEN ( ch_profiles_for_kraken2 )
ch_multiqc_files = ch_multiqc_files.mix( KRAKENTOOLS_COMBINEKREPORTS_KRAKEN.out.txt )
ch_versions = ch_versions.mix( KRAKENTOOLS_COMBINEKREPORTS_KRAKEN.out.versions )

// MetaPhlAn

ch_profiles_for_metaphlan = ch_input_profiles.metaphlan
.map { [it[0]['db_name'], it[1]] }
.groupTuple()
.map {
[[id:it[0]], it[1]]
}
ch_profiles_for_metaphlan = groupProfiles(ch_input_profiles.metaphlan)

METAPHLAN_MERGEMETAPHLANTABLES ( ch_profiles_for_metaphlan )
ch_multiqc_files = ch_multiqc_files.mix( METAPHLAN_MERGEMETAPHLANTABLES.out.txt )
Expand All @@ -198,12 +197,7 @@ workflow STANDARDISATION_PROFILES {
// Therefore removing db info here, and publish merged at root mOTUs results
// directory

ch_profiles_for_motus = ch_input_profiles.motus
.map { [it[0]['db_name'], it[1]] }
.groupTuple()
.map {
[[id:it[0]], it[1]]
}
ch_profiles_for_motus = groupProfiles(ch_input_profiles.motus)

ch_input_for_motusmerge = combineProfilesWithDatabase(ch_profiles_for_motus, ch_input_databases.motus)

Expand All @@ -212,12 +206,7 @@ workflow STANDARDISATION_PROFILES {

// Ganon

ch_profiles_for_ganon = ch_input_profiles.ganon
.map { [it[0]['db_name'], it[1]] }
.groupTuple()
.map {
[[id:it[0]], it[1]]
}
ch_profiles_for_ganon = groupProfiles(ch_input_profiles.ganon)

GANON_TABLE ( ch_profiles_for_ganon )
ch_multiqc_files = ch_multiqc_files.mix( GANON_TABLE.out.txt )
Expand Down

0 comments on commit a9afbc0

Please sign in to comment.