Merge branch 'master' into fresh-ci

nf-core · Oct 23, 2024 · c984d91 · c984d91
2 parents c141a14 + 82dcdac
commit c984d91
Show file tree

Hide file tree

Showing 544 changed files with 13,011 additions and 6,281 deletions.
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
diff --git a/.github/scripts/wave_singularity.py b/.github/scripts/wave_singularity.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env -S uv run
+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "httpx",
+# ]
+# ///
+
+import logging
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+image_url = "oras://community.wave.seqera.io/library/pybedtools_bedtools_htslib_pip_pypints:aa20de1f1b5ddb30"
+
+if image_url.startswith("oras://"):
+    image_url = image_url.replace("oras://", "")
+
+wave_api_url = "https://wave.seqera.io"
+url = f"{wave_api_url}/v1alpha1/inspect"
+
+# if platform_pat:
+#     data["toweraccesstoken"] = platform_pat
+# else:
+# TODO
+logger.warning("'platform_pat' not set, no auth to wave back end")
+
+try:
+    logger.info(f"calling image inspect at {url} for image url {image_url}")
+    response = httpx.post(
+        url=url,
+        json={"containerImage": image_url},
+        headers={"content-type": "application/json"},
+    )
+
+    data = response.json()
+    logger.debug(data)
+    layers = data.get("container", {}).get("manifest", {}).get("layers", [])
+    is_singularity = len(layers) == 1 and layers[0].get("mediaType", "").endswith(".sif")
+    if not is_singularity:
+        print(layers)
+        raise ValueError("not a singularity image")
+    if "digest" not in layers[0]:
+        print(layers)
+        raise ValueError("no 'digest' in first layer found")
+
+    digest = layers[0]["digest"].replace("sha256:", "")
+    container_url = f"https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/{digest[:2]}/{digest}/data"
+    print(container_url)
+
+except httpx.RequestError as exc:
+    print(f"An error occurred while requesting {exc.request.url!r}.")
+    print("No singularity image for you")
diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml
@@ -14,7 +14,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       # Use the @nf-core-bot token to check out so we can push later
-      - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4
+      - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4
         with:
           token: ${{ secrets.nf_core_bot_auth_token }}
 

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -6,7 +6,7 @@ repos:
         additional_dependencies:
           - [email protected]
   - repo: https://github.com/python-jsonschema/check-jsonschema
-    rev: 0.29.3
+    rev: 0.29.4
     hooks:
       - id: check-jsonschema
         # match meta.ymls in one of the subdirectories of modules/nf-core
@@ -23,7 +23,7 @@ repos:
       - id: renovate-config-validator
   # use ruff for python files
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.6.6
+    rev: v0.7.0
     hooks:
       - id: ruff
         files: \.py$

diff --git a/modules/nf-core/arriba/arriba/environment.yml b/modules/nf-core/arriba/arriba/environment.yml
@@ -1,5 +1,8 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
 channels:
   - conda-forge
   - bioconda
+
 dependencies:
   - bioconda::arriba=2.4.0
diff --git a/modules/nf-core/arriba/arriba/main.nf b/modules/nf-core/arriba/arriba/main.nf
@@ -8,14 +8,13 @@ process ARRIBA_ARRIBA {
         'biocontainers/arriba:2.4.0--h0033a41_2' }"
 
     input:
-    tuple val(meta), path(bam)
+    tuple val(meta),  path(bam)
     tuple val(meta2), path(fasta)
     tuple val(meta3), path(gtf)
-    tuple val(meta4), path(blacklist)
-    tuple val(meta5), path(known_fusions)
-    tuple val(meta6), path(structural_variants)
-    tuple val(meta7), path(tags)
-    tuple val(meta8), path(protein_domains)
+    path(blacklist)
+    path(known_fusions)
+    path(cytobands)
+    path(protein_domains)
 
     output:
     tuple val(meta), path("*.fusions.tsv")          , emit: fusions
@@ -30,8 +29,7 @@ process ARRIBA_ARRIBA {
     def prefix = task.ext.prefix ?: "${meta.id}"
     def blacklist = blacklist ? "-b $blacklist" : "-f blacklist"
     def known_fusions = known_fusions ? "-k $known_fusions" : ""
-    def structural_variants = structural_variants ? "-d $structual_variants" : ""
-    def tags = tags ? "-t $tags" : ""
+    def cytobands = cytobands ? "-d $cytobands" : ""
     def protein_domains = protein_domains ? "-p $protein_domains" : ""
 
     """
@@ -43,8 +41,7 @@ process ARRIBA_ARRIBA {
         -O ${prefix}.fusions.discarded.tsv \\
         $blacklist \\
         $known_fusions \\
-        $structural_variants \\
-        $tags \\
+        $cytobands \\
         $protein_domains \\
         $args
 

diff --git a/modules/nf-core/arriba/arriba/meta.yml b/modules/nf-core/arriba/arriba/meta.yml
@@ -43,48 +43,19 @@ input:
         type: file
         description: Annotation GTF file
         pattern: "*.{gtf}"
-  - - meta4:
-        type: map
-        description: |
-          Groovy Map containing reference information
-          e.g. [ id:'test' ]
-    - blacklist:
+  - - blacklist:
         type: file
         description: Blacklist file
         pattern: "*.{tsv}"
-  - - meta5:
-        type: map
-        description: |
-          Groovy Map containing reference information
-          e.g. [ id:'test' ]
-    - known_fusions:
+  - - known_fusions:
         type: file
         description: Known fusions file
         pattern: "*.{tsv}"
-  - - meta6:
-        type: map
-        description: |
-          Groovy Map containing reference information
-          e.g. [ id:'test' ]
-    - structural_variants:
-        type: file
-        description: Structural variants file
-        pattern: "*.{tsv}"
-  - - meta7:
-        type: map
-        description: |
-          Groovy Map containing reference information
-          e.g. [ id:'test' ]
-    - tags:
+  - - cytobands:
         type: file
-        description: Tags file
+        description: Cytobands file
         pattern: "*.{tsv}"
-  - - meta8:
-        type: map
-        description: |
-          Groovy Map containing reference information
-          e.g. [ id:'test' ]
-    - protein_domains:
+  - - protein_domains:
         type: file
         description: Protein domains file
         pattern: "*.{gff3}"

diff --git a/modules/nf-core/arriba/arriba/tests/main.nf.test b/modules/nf-core/arriba/arriba/tests/main.nf.test
@@ -0,0 +1,107 @@
+
+nextflow_process {
+
+    name "Test Process ARRIBA_ARRIBA"
+    script "../main.nf"
+    process "ARRIBA_ARRIBA"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "arriba"
+    tag "arriba/arriba"
+    tag "arriba/download"
+    tag "star/genomegenerate"
+    tag "star/align"
+
+    setup {
+        config "./nextflow.config"
+        options "-stub"
+        run("ARRIBA_DOWNLOAD") {
+            script "../../../arriba/download/main.nf"
+            process {
+                """
+                input[0] = 'GRCh38'
+                """
+            }
+        }
+        run("STAR_GENOMEGENERATE") {
+            script "../../../star/genomegenerate/main.nf"
+            options "-stub"
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test_fasta' ],
+                    [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
+                ])
+                input[1] = Channel.of([
+                    [ id:'test_gtf' ],
+                    [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
+                ])
+                """
+            }
+        }
+        run("STAR_ALIGN") {
+            script "../../../star/align/main.nf"
+            options "-stub"
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test', single_end:false ], // meta map
+                    [
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true),
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true)
+                    ]
+                ])
+                input[1] = STAR_GENOMEGENERATE.out.index
+                input[2] = Channel.of([
+                    [ id:'test_gtf' ],
+                    [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
+                ])
+                input[3] = false
+                input[4] = 'illumina'
+                input[5] = false
+                """
+            }
+        }
+    }
+
+    test("homo_sapiens - paired_end - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'test', single_end:false ], // meta map
+                    [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.rna.paired_end.sorted.bam', checkIfExists: true) ]
+                ])
+                input[1] = Channel.of([
+                    [ id:'test_fasta' ],
+                    [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
+                ])
+                input[2] = Channel.of([
+                    [ id:'test_gtf' ],
+                    [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
+                ])
+                input[3] = []
+
+                input[4] = []
+
+                input[5] = []
+
+                input[6] = []
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/nf-core/arriba/arriba/tests/main.nf.test.snap b/modules/nf-core/arriba/arriba/tests/main.nf.test.snap
@@ -0,0 +1,55 @@
+{
+    "homo_sapiens - paired_end - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.fusions.tsv:md5,f50b84b1db4b83ba62ec1deacc69c260"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.fusions.discarded.tsv:md5,f50b84b1db4b83ba62ec1deacc69c260"
+                    ]
+                ],
+                "2": [
+                    "versions.yml:md5,d323796555db4a58fe4c6bc08d1dec30"
+                ],
+                "fusions": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.fusions.tsv:md5,f50b84b1db4b83ba62ec1deacc69c260"
+                    ]
+                ],
+                "fusions_fail": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.fusions.discarded.tsv:md5,f50b84b1db4b83ba62ec1deacc69c260"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,d323796555db4a58fe4c6bc08d1dec30"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-10-08T15:41:23.945072"
+    }
+}
diff --git a/...les/nf-core/arriba/arriba/nextflow.config → ...-core/arriba/arriba/tests/nextflow.config b/...les/nf-core/arriba/arriba/nextflow.config → ...-core/arriba/arriba/tests/nextflow.config
@@ -1,13 +1,8 @@
 process {
-
-    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
-
     withName: STAR_GENOMEGENERATE {
         ext.args = '--genomeSAindexNbases 11'
     }
-
     withName: STAR_ALIGN {
         ext.args = '--readFilesCommand zcat --outSAMtype BAM Unsorted --outSAMunmapped Within --outBAMcompression 0 --outFilterMultimapNmax 50 --peOverlapNbasesMin 10 --alignSplicedMateMapLminOverLmate 0.5 --alignSJstitchMismatchNmax 5 -1 5 5 --chimSegmentMin 10 --chimOutType WithinBAM HardClip --chimJunctionOverhangMin 10 --chimScoreDropMax 30 --chimScoreJunctionNonGTAG 0 --chimScoreSeparation 1 --chimSegmentReadGapMax 3 --chimMultimapNmax 50'
     }
-
 }
diff --git a/modules/nf-core/arriba/download/main.nf b/modules/nf-core/arriba/download/main.nf
@@ -8,17 +8,21 @@ process ARRIBA_DOWNLOAD {
         'biocontainers/arriba:2.4.0--h0033a41_2' }"
 
     input:
+    val(genome)
 
     output:
-    path "*"              , emit: reference
-    path "versions.yml"           , emit: versions
+    path "blacklist*${genome}*.tsv.gz"       , emit: blacklist
+    path "cytobands*${genome}*.tsv"          , emit: cytobands
+    path "protein_domains*${genome}*.gff3"   , emit: protein_domains
+    path "known_fusions*${genome}*.tsv.gz"   , emit: known_fusions
+    path "versions.yml"                      , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
 
     script:
     """
-    wget https://github.com/suhrig/arriba/releases/download/v2.4.0/arriba_v2.4.0.tar.gz -O arriba_v2.4.0.tar.gz
+    wget https://github.com/suhrig/arriba/releases/download/v2.4.0/arriba_v2.4.0.tar.gz -O arriba_v2.4.0.tar.gz --no-check-certificate
     tar -xzvf arriba_v2.4.0.tar.gz
     rm arriba_v2.4.0.tar.gz
     mv arriba_v2.4.0/database/* .
@@ -36,7 +40,6 @@ process ARRIBA_DOWNLOAD {
     touch protein_domains_hg38_GRCh38_v2.4.0.gff3
     touch cytobands_hg38_GRCh38_v2.4.0.tsv
     touch known_fusions_hg38_GRCh38_v2.4.0.tsv.gz
-    touch protein_domains_hg38_GRCh38_v2.4.0.gff3
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":