diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index c354626..acce95b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -60,6 +60,8 @@ docker-run: NF_PROCESS_FILES: "subworkflows/kraken_pipeline.nf" NF_WORKFLOW_OPTS: "--fastq test_data/case01 --include_kraken2_assignments --abundance_threshold 1 -executor.\\$$local.memory 16GB --database_set ncbi_16s_18s" NF_IGNORE_PROCESSES: "" + AFTER_NEXTFLOW_CMD: > + grep "Found empty file for sample 'barcode05'" .nextflow.log # In wf-metagenomics, the wf runs indefinitely if there is no condition to stop it. # With the read limit we can stop the wf if the limit is reached. # It creates a STOP.fastq.gz that will be a new input in the wf and make it stop. diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b8502d..11c97b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] +### Fixed +- Files that are empty following the fastcat filtering are discarded from downstream analyses. + ## [v2.9.2] ### Fixed - "Can only use .dt accessor with datetimelike values" error in makeReport diff --git a/README.md b/README.md index 002d341..cd913cf 100644 --- a/README.md +++ b/README.md @@ -377,7 +377,7 @@ The report also includes the rarefaction curve per sample which displays the mea + See how to interpret some common nextflow exit codes [here](https://labs.epi2me.io/trouble-shooting/). + When using the Minimap2 pipeline with a custom database, you must make sure that the `ref2taxid` and reference files are coherent, as well as the taxonomy database. + If your device doesn't have the resources to use large Kraken2 databases (e.g. Standard-8, PlusPF-8 and PlusPFP-8), you can enable `kraken2_memory_mapping` to reduce the amount of memory required. -+ At this moment, the workflow does not support empty input files. Please make sure all the input files contain at least a few reads before starting the workflow and consider removing those empty file from the analysis. There can be empty barcode directories with no FASTQ files within them, but if the FASTQ file is there, it should contain reads. + diff --git a/docs/09_troubleshooting.md b/docs/09_troubleshooting.md index 57db8dc..e5a3d48 100644 --- a/docs/09_troubleshooting.md +++ b/docs/09_troubleshooting.md @@ -2,4 +2,3 @@ + See how to interpret some common nextflow exit codes [here](https://labs.epi2me.io/trouble-shooting/). + When using the Minimap2 pipeline with a custom database, you must make sure that the `ref2taxid` and reference files are coherent, as well as the taxonomy database. + If your device doesn't have the resources to use large Kraken2 databases (e.g. Standard-8, PlusPF-8 and PlusPFP-8), you can enable `kraken2_memory_mapping` to reduce the amount of memory required. -+ At this moment, the workflow does not support empty input files. Please make sure all the input files contain at least a few reads before starting the workflow and consider removing those empty file from the analysis. There can be empty barcode directories with no FASTQ files within them, but if the FASTQ file is there, it should contain reads. \ No newline at end of file diff --git a/main.nf b/main.nf index e97e588..3eafd32 100644 --- a/main.nf +++ b/main.nf @@ -115,7 +115,18 @@ workflow { ]) } - + // Discard empty samples + log.info( + "Note: Empty files or those files whose reads have been discarded after filtering based on " + + "read length and/or read quality will not appear in the report and will be excluded from subsequent analysis.") + samples = samples + | filter { meta, seqs, stats -> + valid = meta['n_seqs'] > 0 + if (!valid) { + log.warn "Found empty file for sample '${meta["alias"]}'." + } + valid + } // Call the proper pipeline if ("${params.classifier}" == "minimap2") { diff --git a/nextflow.config b/nextflow.config index 9edbf13..18a81eb 100644 --- a/nextflow.config +++ b/nextflow.config @@ -120,7 +120,7 @@ params { ] agent = null container_sha = "sha44a6dacff5f2001d917b774647bb4cbc1b53bc76" - common_sha = "shaa0c37a1cad3357e2b5c6fa8b9ebc25ee9ee88879" + common_sha = "sha362c808b4f22ce66f940bef192a1316aec5f4c75" container_sha_amr = "sha2c763f19fac46035437854f1e2a5f05553542a78" } } diff --git a/test_data/case01/barcode05/empty.fastq b/test_data/case01/barcode05/empty.fastq new file mode 100644 index 0000000..e69de29