Skip to content

Commit

Permalink
Merge branch 'dev' into nf-core-template-merge-2.7.1
Browse files Browse the repository at this point in the history
  • Loading branch information
jfy133 committed Dec 9, 2022
2 parents 36f3b57 + 1687b43 commit 1ccd0a0
Show file tree
Hide file tree
Showing 130 changed files with 21,077 additions and 402 deletions.
138 changes: 134 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,27 @@ jobs:
if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/taxprofiler') }}"
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
NXF_VER:
- "22.10.1"
- "latest-everything"
parameters:
- "--preprocessing_qc_tool falco"
- "--perform_longread_qc false"
- "--perform_shortread_qc false"
- "--shortread_qc_tool fastp"
- "--shortread_qc_tool fastp --shortread_qc_mergepairs --shortread_qc_includeunmerged"
- "--shortread_qc_tool fastp --shortread_qc_mergepairs"
- "--shortread_qc_tool adapterremoval"
- "--shortread_qc_tool adapterremoval --shortread_qc_mergepairs --shortread_qc_includeunmerged"
- "--shortread_qc_tool adapterremoval --shortread_qc_mergepairs"
- "--shortread_complexityfilter_tool bbduk"
- "--shortread_complexityfilter_tool prinseqplusplus"
- "--perform_runmerging"
- "--perform_runmerging --shortread_qc_mergepairs"
- "--shortread_complexityfilter false --perform_shortread_hostremoval"

steps:
- name: Check out pipeline code
uses: actions/checkout@v3
Expand All @@ -35,9 +52,122 @@ jobs:
with:
version: "${{ matrix.NXF_VER }}"

- name: Show current locale
run: locale

- name: Set UTF-8 enabled locale
run: |
sudo locale-gen en_US.UTF-8
sudo update-locale LANG=en_US.UTF-8
- name: Run pipeline with test data
uses: Wandalen/[email protected]
with:
command: nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results ${{ matrix.parameters }}
attempt_limit: 3

motus:
name: Test mOTUs with workflow parameters
if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/taxprofiler') }}
runs-on: ubuntu-latest
strategy:
matrix:
NXF_VER:
- "21.10.3"
- "latest-everything"

steps:
- name: Check out pipeline code
uses: actions/checkout@v2

- name: Install Nextflow
uses: nf-core/setup-nextflow@v1
with:
version: "${{ matrix.NXF_VER }}"

- name: Show current locale
run: locale

- name: Set UTF-8 enabled locale
run: |
sudo locale-gen en_US.UTF-8
sudo update-locale LANG=en_US.UTF-8
- name: Prepare the database
run: |
wget https://raw.githubusercontent.com/motu-tool/mOTUs/master/motus/downloadDB.py
python downloadDB.py > download_db_log.txt
echo 'tool,db_name,db_params,db_path' > 'database_motus.csv'
echo 'motus,db_mOTU,,db_mOTU' >> 'database_motus.csv'
- name: Run pipeline with test data
uses: Wandalen/[email protected]
with:
command: nextflow run ${GITHUB_WORKSPACE} -profile test_motus,docker --outdir ./results --databases ./database_motus.csv
attempt_limit: 3

krakenuniq:
name: Test KrakenUniq with workflow parameters
if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/taxprofiler') }}
runs-on: ubuntu-latest
strategy:
matrix:
NXF_VER:
- "21.10.3"
- "latest-everything"

steps:
- name: Check out pipeline code
uses: actions/checkout@v2

- name: Install Nextflow
uses: nf-core/setup-nextflow@v1
with:
version: "${{ matrix.NXF_VER }}"

- name: Show current locale
run: locale

- name: Set UTF-8 enabled locale
run: |
sudo locale-gen en_US.UTF-8
sudo update-locale LANG=en_US.UTF-8
- name: Run pipeline with test data
# TODO nf-core: You can customise CI pipeline run tests as required
# For example: adding multiple test runs with different parameters
# Remember that you can parallelise this by using strategy.matrix
uses: Wandalen/[email protected]
with:
command: nextflow run ${GITHUB_WORKSPACE} -profile test_krakenuniq,docker --outdir ./results
attempt_limit: 3

malt:
name: Test MALT with workflow parameters
if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/taxprofiler') }}
runs-on: ubuntu-latest
strategy:
matrix:
NXF_VER:
- "21.10.3"
- "latest-everything"

steps:
- name: Check out pipeline code
uses: actions/checkout@v2

- name: Install Nextflow
uses: nf-core/setup-nextflow@v1
with:
version: "${{ matrix.NXF_VER }}"

- name: Show current locale
run: locale

- name: Set UTF-8 enabled locale
run: |
nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results
sudo locale-gen en_US.UTF-8
sudo update-locale LANG=en_US.UTF-8
- name: Run pipeline with test data
uses: Wandalen/[email protected]
with:
command: nextflow run ${GITHUB_WORKSPACE} -profile test_nothing,docker --run_malt --outdir ./results
attempt_limit: 3
1 change: 1 addition & 0 deletions .prettierignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ testing/
testing*
*.pyc
bin/
tests/
59 changes: 59 additions & 0 deletions CITATIONS.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,67 @@
- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)

- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/)

> Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.
- [fastp](https://doi.org/10.1093/bioinformatics/bty560)

> Chen, Shifu, Yanqing Zhou, Yaru Chen, and Jia Gu. 2018. Fastp: An Ultra-Fast All-in-One FASTQ Preprocessor. Bioinformatics 34 (17): i884-90. 10.1093/bioinformatics/bty560.
- [AdapterRemoval2](https://doi.org/10.1186/s13104-016-1900-2)

> Schubert, Mikkel, Stinus Lindgreen, and Ludovic Orlando. 2016. AdapterRemoval v2: Rapid Adapter Trimming, Identification, and Read Merging. BMC Research Notes 9 (February): 88. doi:10.1186/s13104-016-1900-2.
- [Porechop](https://github.com/rrwick/Porechop)

- [BBTools](http://sourceforge.net/projects/bbmap/)

- [PRINSEQ++](https://doi.org/10.7287/peerj.preprints.27553v1)

> Cantu, Vito Adrian, Jeffrey Sadural, and Robert Edwards. 2019. PRINSEQ++, a Multi-Threaded Tool for Fast and Efficient Quality Control and Preprocessing of Sequencing Datasets. e27553v1. PeerJ Preprints. doi: 10.7287/peerj.preprints.27553v1.
- [Kraken2](https://doi.org/10.1186/s13059-019-1891-0)

> Wood, Derrick E., Jennifer Lu, and Ben Langmead. 2019. Improved Metagenomic Analysis with Kraken 2. Genome Biology 20 (1): 257. doi: 10.1186/s13059-019-1891-0.
- [KrakenUniq](https://doi.org/10.1186/s13059-018-1568-0)

> Breitwieser, Florian P., Daniel N. Baker, and Steven L. Salzberg. 2018. KrakenUniq: confident and fast metagenomics classification using unique k-mer counts. Genome Biology 19 (1): 198. doi: 10.1186/s13059-018-1568-0
- [Bracken](https://doi.org/10.7717/peerj-cs.104)

> Lu, J., Breitwieser, F. P., Thielen, P., & Salzberg, S. L. (2017). Bracken: Estimating species abundance in metagenomics data. PeerJ Computer Science, 3, e104. doi: 10.7717/peerj-cs.104
- [Krona](https://doi.org/10.1186/1471-2105-12-385)

> Ondov, Brian D., Nicholas H. Bergman, and Adam M. Phillippy. 2011. Interactive metagenomic visualization in a Web browser. BMC Bioinformatics 12 (1): 385. doi: 10.1186/1471-2105-12-385.
- [MALT](https://doi.org/10.1038/s41559-017-0446-6)

> Vågene, Åshild J., Alexander Herbig, Michael G. Campana, Nelly M. Robles García, Christina Warinner, Susanna Sabin, Maria A. Spyrou, et al. 2018. Salmonella Enterica Genomes from Victims of a Major Sixteenth-Century Epidemic in Mexico. Nature Ecology & Evolution 2 (3): 520-28. doi: 10.1038/s41559-017-0446-6.
- [MEGAN](https://doi.org/10.1371/journal.pcbi.1004957)

> Huson, Daniel H., Sina Beier, Isabell Flade, Anna Górska, Mohamed El-Hadidi, Suparna Mitra, Hans-Joachim Ruscheweyh, and Rewati Tappu. 2016. “MEGAN Community Edition - Interactive Exploration and Analysis of Large-Scale Microbiome Sequencing Data.” PLoS Computational Biology 12 (6): e1004957. doi: 10.1371/journal.pcbi.1004957.
- [MetaPhlAn3](https://doi.org/10.7554/eLife.65088)

> Beghini, Francesco, Lauren J McIver, Aitor Blanco-Míguez, Leonard Dubois, Francesco Asnicar, Sagun Maharjan, Ana Mailyan, et al. 2021. “Integrating Taxonomic, Functional, and Strain-Level Profiling of Diverse Microbial Communities with BioBakery 3.” Edited by Peter Turnbaugh, Eduardo Franco, and C Titus Brown. ELife 10 (May): e65088. doi: 10.7554/eLife.65088
- [Centrifuge](https://doi.org/10.1101/gr.210641.116)

> Kim, Daehwan, Li Song, Florian P. Breitwieser, and Steven L. Salzberg. 2016. “Centrifuge: Rapid and Sensitive Classification of Metagenomic Sequences.” Genome Research 26 (12): 1721-29. doi: 10.1101/gr.210641.116.
- [DIAMOND](https://doi.org/10.1038/nmeth.3176)

> Buchfink, Benjamin, Chao Xie, and Daniel H. Huson. 2015. “Fast and Sensitive Protein Alignment Using DIAMOND.” Nature Methods 12 (1): 59-60. doi: 10.1038/nmeth.3176.
- [FILTLONG](https://github.com/rrwick/Filtlong)

- [falco](https://doi.org/10.12688/f1000research.21142.2)

> de Sena Brandine G and Smith AD. Falco: high-speed FastQC emulation for quality control of sequencing data. F1000Research 2021, 8:1874
## Software packaging/containerisation tools

- [Anaconda](https://anaconda.com)
Expand Down
43 changes: 33 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# ![nf-core/taxprofiler](docs/images/nf-core-taxprofiler_logo_light.png#gh-light-mode-only) ![nf-core/taxprofiler](docs/images/nf-core-taxprofiler_logo_dark.png#gh-dark-mode-only)
# ![nf-core/taxprofiler](docs/images/nf-core-taxprofiler_logo_custom_light.png#gh-light-mode-only) ![nf-core/taxprofiler](docs/images/nf-core-taxprofiler_logo_custom_dark.png#gh-dark-mode-only)

[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/taxprofiler/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)

Expand All @@ -12,9 +12,11 @@

## Introduction

> ⚠️ This pipeline is still under development! While the pipeline is usable, not all functionality will be available!
<!-- TODO nf-core: Write a 1-2 sentence summary of what data the pipeline is for and what it does -->

**nf-core/taxprofiler** is a bioinformatics best-practice analysis pipeline for Taxonomic profiling of shotgun metagenomic data.
**nf-core/taxprofiler** is a bioinformatics best-practice analysis pipeline for taxonomic profiling of shotgun metagenomic data. It allows for in-parallel profiling with multiple profiling tools against multiple databases, produces standardised output tables.

The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!

Expand All @@ -26,12 +28,33 @@ On release, automated continuous integration tests run the pipeline on a full-si

<!-- TODO nf-core: Fill in short bullet-pointed list of the default steps in the pipeline -->

1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))
2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))
![](docs/images/taxprofiler_tube.png)

1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) or [`falco`](https://github.com/smithlabcode/falco) as an alternative option)
2. Performs optional read pre-processing
- Adapter clipping and merging (short-read: [fastp](https://github.com/OpenGene/fastp), [AdapterRemoval2](https://github.com/MikkelSchubert/adapterremoval); long-read: [porechop](https://github.com/rrwick/Porechop))
- Low complexity and quality filtering (short-read: [bbduk](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/), [PRINSEQ++](https://github.com/Adrian-Cantu/PRINSEQ-plus-plus); long-read: [Filtlong](https://github.com/rrwick/Filtlong))
- Host-read removal (short-read: [BowTie2](http://bowtie-bio.sourceforge.net/bowtie2/); long-read: [Minimap2](https://github.com/lh3/minimap2))
- Run merging
3. Supports statistics for host-read removal ([Samtools](http://www.htslib.org/))
4. Performs taxonomic profiling using one or more of:
- [Kraken2](https://ccb.jhu.edu/software/kraken2/)
- [MetaPhlAn3](https://huttenhower.sph.harvard.edu/metaphlan/)
- [MALT](https://uni-tuebingen.de/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/informatik/lehrstuehle/algorithms-in-bioinformatics/software/malt/)
- [DIAMOND](https://github.com/bbuchfink/diamond)
- [Centrifuge](https://ccb.jhu.edu/software/centrifuge/)
- [Kaiju](https://kaiju.binf.ku.dk/)
- [mOTUs](https://motu-tool.org/)
- [KrakenUniq](https://github.com/fbreitwieser/krakenuniq)
5. Perform optional post-processing with:
- [bracken](https://ccb.jhu.edu/software/bracken/)
6. Standardises output tables
7. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))
8. Plotting Kraken2, Centrifuge, Kaiju and MALT results ([`Krona`](https://hpc.nih.gov/apps/kronatools.html))

## Quick Start

1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=22.10.1`)
1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=21.10.3`).

2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_.

Expand All @@ -50,12 +73,12 @@ On release, automated continuous integration tests run the pipeline on a full-si
4. Start running your own analysis!

<!-- TODO nf-core: Update the example "typical command" below used to run the pipeline -->

```bash
nextflow run nf-core/taxprofiler --input samplesheet.csv --outdir <OUTDIR> --genome GRCh37 -profile <docker/singularity/podman/shifter/charliecloud/conda/institute>
```console
nextflow run nf-core/taxprofiler --input samplesheet.csv --databases database.csv --outdir <OUTDIR> --run_<TOOL1> --run_<TOOL1> -profile <docker/singularity/podman/shifter/charliecloud/conda/institute>
```

Note pipeline supports both CSV and PEP input sample sheets. Find out more [here](http://pep.databio.org/en/2.1.0/specification/).

## Documentation

The nf-core/taxprofiler pipeline comes with documentation about the pipeline [usage](https://nf-co.re/taxprofiler/usage), [parameters](https://nf-co.re/taxprofiler/parameters) and [output](https://nf-co.re/taxprofiler/output).
Expand All @@ -66,7 +89,7 @@ nf-core/taxprofiler was originally written by nf-core community.

We thank the following people for their extensive assistance in the development of this pipeline:

<!-- TODO nf-core: If applicable, make list of people who have also contributed -->
[James A. Fellows Yates](https://github.com/jfy133), [Moritz Beber](https://github.com/Midnighter), [Lauri Mesilaakso](https://github.com/ljmesi), [Sofia Stamouli](https://github.com/sofsam), [Maxime Borry](https://github.com/maxibor),[Thomas A. Christensen II](https://github.com/MillironX), [Jianhong Ou](https://github.com/jianhong), [Rafal Stepien](https://github.com/rafalstepien), [Mahwash Jamy](https://github.com/mjamy).

## Contributions and Support

Expand Down
44 changes: 44 additions & 0 deletions assets/multiqc_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,47 @@ report_section_order:
order: -1002

export_plots: true

custom_logo: "nf-core-taxprofiler_logo_custom_light.png"
custom_logo_url: https://nf-co.re/taxprofiler
custom_logo_title: "nf-core/taxprofiler"

run_modules:
- fastqc
- adapterRemoval
- fastp
- bowtie2
- samtools
- kraken
- malt
- custom_content

#extra_fn_clean_exts:
# - '_fastp'
# - '.pe.settings'
# - '.se.settings'

top_modules:
- "fastqc":
name: "FastQC (pre-Trimming)"
path_filters:
- "*raw_*fastqc.zip"
- "fastp"
- "adapterRemoval"
- "fastqc":
name: "FastQC (post-Trimming)"
path_filters:
- "*raw_*processed.zip"
- "kraken":
name: "Kraken"
path_filters:
- "*.kraken2.report.txt"
- "kraken":
name: "Centrifuge"
anchor: "centrifuge"
target: "Centrifuge"
doi: "10.1101/gr.210641.116"
info: "is a very rapid and memory-efficient system for the classification of DNA sequences from microbial samples. The system uses a novel indexing scheme based on the Burrows-Wheeler transform (BWT) and the Ferragina-Manzini (FM) index. Note: Figure title"
extra: "Note: plot title will say Kraken2 due to Centrifuge producing the same output format as Kraken. If activated, see the actual Kraken2 results in the section above."
path_filters:
- "*.centrifuge.txt"
9 changes: 6 additions & 3 deletions assets/samplesheet.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
sample,fastq_1,fastq_2
SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz
SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,
sample,run_accession,instrument_platform,fastq_1,fastq_2,fasta
2611,ERR5766174,ILLUMINA,,,/<path>/<to>/fasta/ERX5474930_ERR5766174_1.fa.gz
2612,ERR5766176,ILLUMINA,/<path>/<to>/fastq/ERX5474932_ERR5766176_1.fastq.gz,/<path>/<to>/fastq/ERX5474932_ERR5766176_2.fastq.gz,
2612,ERR5766180,ILLUMINA,/<path>/<to>/fastq/ERX5474936_ERR5766180_1.fastq.gz,,
2613,ERR5766181,ILLUMINA,/<path>/<to>/fastq/ERX5474937_ERR5766181_1.fastq.gz,/<path>/<to>/fastq/ERX5474937_ERR5766181_2.fastq.gz,
ERR3201952,ERR3201952,OXFORD_NANOPORE,/<path>/<to>/fastq/ERR3201952.fastq.gz,,
Loading

0 comments on commit 1ccd0a0

Please sign in to comment.