Merge pull request #159 from nf-core/dev

Version 3.0 Release Candidate
nf-core · Oct 27, 2022 · dcaa442 · dcaa442
2 parents 971984a + 994338e
commit dcaa442
Show file tree

Hide file tree

Showing 223 changed files with 4,619 additions and 2,912 deletions.
diff --git a/.editorconfig b/.editorconfig
@@ -8,7 +8,7 @@ trim_trailing_whitespace = true
 indent_size = 4
 indent_style = space
 
-[*.{md,yml,yaml,html,css,scss,js}]
+[*.{md,yml,yaml,html,css,scss,js,cff}]
 indent_size = 2
 
 # These files are edited and tested upstream in nf-core/modules

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -15,8 +15,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/cuta
 
 - [ ] This comment contains a description of changes (with reason).
 - [ ] If you've fixed a bug or added code that should be tested, add tests!
-  - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/cutandrun/tree/master/.github/CONTRIBUTING.md)
-  - [ ] If necessary, also make a PR on the nf-core/cutandrun _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository.
+- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/cutandrun/tree/master/.github/CONTRIBUTING.md)- [ ] If necessary, also make a PR on the nf-core/cutandrun _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository.
 - [ ] Make sure your code lints (`nf-core lint`).
 - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir <OUTDIR>`).
 - [ ] Usage Documentation in `docs/usage.md` is updated.

diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml
@@ -15,9 +15,6 @@ jobs:
     steps:
       - name: Launch workflow via tower
         uses: nf-core/tower-action@v3
-        # TODO nf-core: You can customise AWS full pipeline tests as required
-        # Add full size test data (but still relatively small datasets for few samples)
-        # on the `test_full.config` test runs with only one set of parameters
         with:
           workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }}
           access_token: ${{ secrets.TOWER_ACCESS_TOKEN }}
@@ -28,3 +25,7 @@ jobs:
               "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/cutandrun/results-${{ github.sha }}"
             }
           profiles: test_full,aws_tower
+      - uses: actions/upload-artifact@v3
+        with:
+          name: Tower debug log file
+          path: tower_action_*.log
diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml
@@ -23,3 +23,7 @@ jobs:
               "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/cutandrun/results-test-${{ github.sha }}"
             }
           profiles: test,aws_tower
+      - uses: actions/upload-artifact@v3
+        with:
+          name: Tower debug log file
+          path: tower_action_*.log
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -8,6 +8,11 @@ on:
   release:
     types: [published]
 
+# Cancel if a newer run is started
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 jobs:
   ##############################
   ### SMALL INTEGRATION TEST ###
@@ -18,17 +23,12 @@ jobs:
     if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/cutandrun') }}"
     runs-on: ubuntu-latest
     env:
-      NXF_VER: ${{ matrix.nxf_ver }}
+      NXF_VER: ${{ matrix.NXF_VER }}
       NXF_ANSI_LOG: false
       CAPSULE_LOG: none
     strategy:
       matrix:
-        # Nextflow versions
-        include:
-          # Test pipeline minimum Nextflow version
-          - NXF_VER: "21.10.3"
-          # Test latest nextflow version
-          - NXF_VER: ""
+        NXF_VER: ["21.10.3", ""]
     steps:
       - name: Check out pipeline code
         uses: actions/checkout@v2
@@ -70,7 +70,7 @@ jobs:
     strategy:
       matrix:
         # Nextflow versions: check pipeline minimum and current latest
-        nxf_ver: ["21.10.3", ""]
+        NXF_VER: ["21.10.3", ""]
     steps:
       - name: Check out pipeline code
         uses: actions/checkout@v2
@@ -106,19 +106,16 @@ jobs:
     if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/cutandrun') }}
     runs-on: ubuntu-latest
     env:
-      NXF_VER: ""
-      NXF_EDGE: 1
       NXF_ANSI_LOG: false
       CAPSULE_LOG: none
     steps:
       - name: Check out pipeline code
         uses: actions/checkout@v2
 
       - name: Install Nextflow
-        run: |
-          wget -qO- get.nextflow.io | bash
-          sudo mv nextflow /usr/local/bin/
-          nextflow self-update
+        uses: nf-core/setup-nextflow@v1
+        with:
+          version: "latest-everything"
 
       # Work around for the unexpected end of file error that github actions seems to get when downloading compressed
       # files during pipeline execution
@@ -141,34 +138,23 @@ jobs:
   ###      UNIT TESTS        ###
   ##############################
   unit_tests:
-    name: ${{ matrix.nxf_version }} ${{ matrix.tags }}
+    name: ${{ matrix.NXF_VER }} ${{ matrix.tags }}
     if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/cutandrun') }}
     runs-on: ubuntu-20.04
     env:
+      NXF_VER: ${{ matrix.NXF_VER }}
       NXF_ANSI_LOG: false
       CAPSULE_LOG: none
     strategy:
       fail-fast: false
       matrix:
-        nxf_version: ["21.10.3", ""]
+        NXF_VER: ["21.10.3", ""]
         tags:
-          - test_params
+          - test_genome_options
           - test_samplesheet
-          - verify_output_save_ref
-          - verify_output_only_input
-          - verify_output_save_merged
-          - verify_output_skip_fastqc
-          - verify_output_save_trimmed
-          - verify_output_skip_trimming
-          - verify_output_align_intermed
-          - verify_output_align_only_align
-          - verify_output_align_save_spikein_align
-          - verify_output_align_save_unaligned
-          - verify_output_only_filtering
-          - verify_output_align_duplicates_mark
-          - verify_output_align_duplicates_remove
-          - verify_output_align_duplicates_remove_target
-          - verify_output_peak_calling_only_peak_calling
+          - test_samplesheet_2
+          - test_filtering_noqfilter
+          - test_filtering_withqfilter
           - test_bam_scale_none
           - test_bam_scale_spikein
           - test_bam_scale_cpm
@@ -182,14 +168,37 @@ jobs:
           - test_peak_callers_macs2_seacr
           - test_peak_callers_seacr_macs2_noigg
           - test_peak_callers_ctrl_tests
-          - test_conseneus_peaks_group
-          - test_conseneus_peaks_all
-          - test_conseneus_peaks_invalid
+          - test_consensus_peaks_group
+          - test_consensus_peaks_all
+          - test_consensus_peaks_invalid
+          - verify_output_only_input
+          - verify_output_save_merged
+          - verify_output_save_trimmed
+          - verify_output_skip_trimming
+          - verify_output_skip_fastqc
+          - verify_output_save_ref
+          - verify_output_align_only_align
+          - verify_output_align_intermed
+          - verify_output_align_save_spikein_align
+          - verify_output_align_save_unaligned
+          - verify_output_align_duplicates_mark
+          - verify_output_align_duplicates_remove
+          - verify_output_align_duplicates_remove_target
+          - verify_output_only_filtering
+          - verify_output_peak_calling_only_peak_calling
+          - verify_output_reporting_skip_preseq_false
+          - verify_output_reporting_skip_preseq_true
+          - verify_output_reporting_skip_dtqc_false
+          - verify_output_reporting_skip_dtqc_true
+          - verify_output_reporting_skip_heatmaps_false
+          - verify_output_reporting_skip_heatmaps_true
+          - verify_output_reporting_skip_igv_false
+          - verify_output_reporting_skip_igv_true
+          - verify_output_reporting_skip_multiqc_false
+          - verify_output_reporting_skip_multiqc_true
+          - verify_output_reporting_skip_peak_qc_false
+          - verify_output_reporting_skip_peak_qc_true
           - verify_output_reporting_skip_reporting
-          - verify_output_reporting_skip_igv
-          - verify_output_reporting_skip_heatmaps
-          - verify_output_reporting_skip_multiqc
-          - verify_output_skip_frip
     steps:
       - name: Checkout Code
         uses: actions/checkout@v2
@@ -219,26 +228,30 @@ jobs:
             ${{ runner.os }}-nextflow-
 
       - name: Install Nextflow
-        env:
-          NXF_VER: ${{ matrix.nxf_version }}
-          CAPSULE_LOG: none
         run: |
           wget -qO- get.nextflow.io | bash
           sudo mv nextflow /usr/local/bin/
 
       - name: Run pytest-workflow
         uses: Wandalen/[email protected]
         with:
-          command: pytest --tag ${{ matrix.tags }} --kwdof
+          command: TMPDIR=~ PROFILE=docker pytest --tag ${{ matrix.tags }} --symlink --kwdof --color=yes
           attempt_limit: 3
 
+      - name: Output log on failure
+        if: failure()
+        run: |
+          sudo apt install bat > /dev/null
+          batcat --decorations=always --color=always /home/runner/pytest_workflow_*/*/log.{out,err}
       - name: Upload logs on failure
         if: failure()
         uses: actions/upload-artifact@v2
         with:
-          name: logs-${{ matrix.tags }}-${{ matrix.profile }}-${{ matrix.nxf_version }}
+          name: logs-unit-tests
           path: |
-            /tmp/pytest_workflow_*/*/.nextflow.log
-            /tmp/pytest_workflow_*/*/log.out
-            /tmp/pytest_workflow_*/*/log.err
-            /tmp/pytest_workflow_*/*/work
+            /home/runner/pytest_workflow_*/*/.nextflow.log
+            /home/runner/pytest_workflow_*/*/log.out
+            /home/runner/pytest_workflow_*/*/log.err
+            /home/runner/pytest_workflow_*/*/work
+            !/home/runner/pytest_workflow_*/*/work/conda
+            !/home/runner/pytest_workflow_*/*/work/singularity
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
@@ -35,22 +35,48 @@ jobs:
       - name: Run Prettier --check
         run: prettier --check ${GITHUB_WORKSPACE}
 
+  PythonBlack:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Check code lints with Black
+        uses: psf/black@stable
+
+      # If the above check failed, post a comment on the PR explaining the failure
+      - name: Post PR comment
+        if: failure()
+        uses: mshick/add-pr-comment@v1
+        with:
+          message: |
+            ## Python linting (`black`) is failing
+
+            To keep the code consistent with lots of contributors, we run automated code consistency checks.
+            To fix this CI test, please run:
+
+            * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black`
+            * Fix formatting errors in your pipeline: `black .`
+
+            Once you push these changes the test should pass, and you can hide this comment :+1:
+
+            We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help!
+
+            Thanks again for your contribution!
+          repo-token: ${{ secrets.GITHUB_TOKEN }}
+          allow-repeats: false
+
   nf-core:
     runs-on: ubuntu-latest
     steps:
       - name: Check out pipeline code
         uses: actions/checkout@v2
 
       - name: Install Nextflow
-        env:
-          CAPSULE_LOG: none
-        run: |
-          wget -qO- get.nextflow.io | bash
-          sudo mv nextflow /usr/local/bin/
+        uses: nf-core/setup-nextflow@v1
 
       - uses: actions/setup-python@v3
         with:
-          python-version: "3.6"
+          python-version: "3.7"
           architecture: "x64"
 
       - name: Install dependencies

diff --git a/.prettierignore b/.prettierignore
@@ -1,4 +1,5 @@
 email_template.html
+adaptivecard.json
 .nextflow*
 work/
 data/

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -103,3 +103,56 @@ Note, since the pipeline is now using Nextflow DSL2, each process will be run wi
 > **NB:** Dependency has been **updated** if both old and new version information is present.
 > **NB:** Dependency has been **added** if just the new version information is present.
 > **NB:** Dependency has been **removed** if version information isn't present.
+
+## [3.0] - 2022-09-26
+
+### Major Changes
+
+- Major rework of the pipeline internal flow structure. Metadata from processes (such as read counts) was previously annotated to a channel dictionary that was passed through the pipeline where various reporting processes could use the data. This was interacting with quite a few bugs in the Nextflow pipeline resume feature, causing lots of processes to rerun unnecessarily on resume. Any metadata generated in the pipeline is now written to files and passed where necessary to consuming reporting processes. This has drastically improved the number of processes that incorrectly rerun on resume.
+
+- Re-organized the pipeline into clearer sections, breaking related processes into sub-workflows where possible. This is for better readability, but also to prepare the pipeline for the major upcoming nf-core feature of re-usable sub-workflows. as part of this rework, the pipeline now has distinct sections for fragment-based QC and peak-based QC.
+
+- All reporting has been moved into MultiQC where possible. All PDF-based charting has been removed. Other PDF reports such as heatmaps and upset plots are still generated.
+
+- We have listened to user comments that there is no guide on how to interpret the results from the pipeline. In response, we have revamped the documentation in the `output.md` document to describe the reporting in much more depth including good and bad examples of reporting output where possible.
+
+- [[#140](https://github.com/nf-core/cutandrun/issues/140)] - IGV browser output has been reworked. We first fixed the performance issues with long load times by including the genome index into the session folder. IGV output now includes peaks from all peak callers used in pipeline, not just the primary one. Users can now select whether the gene track exported with the IGV session contains gene symbols or gene names. Several visual changes have been made to improve the default appearance and order of tracks.
+
+- Added PreSeq library complexity reporting.
+
+- Added full suite of fragment-based deepTools QC using the `multiBAMSummary` module. We generate three reporting from this fragment dataset: PCA, correlation and fingerprint plots. This has replaced our previous python implementation of sample correlation calculation.
+
+- All coverage tracks generated from reads now extend reads to full fragment length by default. We feel this creates more realistic coverage tracks for CUT&RUN and improves the accuracy of other fragment-based reports.
+
+### Enhancements
+
+- Updated pipeline template to nf-core/tools `2.5.1`.
+- [[#149](https://github.com/nf-core/cutandrun/issues/149)] - Pipeline will now use a blacklist file if provided to create an include list for the genome.
+- The FRiP score is now calculated based on extended read fragments and not just mapped reads.
+- [[#138](https://github.com/nf-core/cutandrun/issues/138)] - Better sample sheet error reporting.
+- Gene bed files will now be automatically created from the GTF file if not supplied.
+- The default minimum q-score for read quality has been changed from 0 to 20.
+- [[#156](https://github.com/nf-core/cutandrun/issues/156)] SEACR has been better parameterized with dedicated config values for stringency and normalization. Credit to `CloXD` for this.
+- deepTools heatmap generation has been better parameterized with dedicated config values for the gene and peak region settings.
+- Consensus peak count reporting has been added to MultiQC.
+- Reviewed and updated CI tests for better code coverage.
+- Updated all nf-core modules to latest versions.
+
+### Fixes
+
+- Fixed some bugs in the passing of MACS2 peak data through the pipeline in v2.0. MACS2 peaks will now be correctly used and reporting on in the pipeline.
+- [[#135](https://github.com/nf-core/cutandrun/issues/135)] - Removed many of the yellow warnings that were appearing in the pipeline to do with resource config options for processes that were not run.
+- [[#137](https://github.com/nf-core/cutandrun/issues/137)] - Fixed the `workflow.OnComplete` error.
+
+### Software dependencies
+
+Note, since the pipeline is now using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference.
+
+| Dependency | Old version | New version |
+| ---------- | ----------- | ----------- |
+| `multiqc`  | 1.12        | 1.13        |
+| `picard`   | 2.27.2      | 2.27.4      |
+
+> **NB:** Dependency has been **updated** if both old and new version information is present.
+> **NB:** Dependency has been **added** if just the new version information is present.
+> **NB:** Dependency has been **removed** if version information isn't present.