Skip to content

Commit

Permalink
Merge branch 'dsl2' into quarto
Browse files Browse the repository at this point in the history
  • Loading branch information
MeriamOs authored Dec 3, 2024
2 parents 639f43d + 40c36f5 commit d090c73
Show file tree
Hide file tree
Showing 11 changed files with 444 additions and 8 deletions.
29 changes: 29 additions & 0 deletions bin/create_acc2tax.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/usr/bin/env python

import argparse
import pysam
from pathlib import Path


def parse_args():
parser = argparse.ArgumentParser("Create acc2tax file")
parser.add_argument("genome", type=Path, help="Path to genome file")
parser.add_argument("-t", type=int, dest="taxid", help="taxid")

return parser.parse_args()


def acc2tax(genome, taxid):
entry_dict = dict()
with pysam.FastxFile(genome) as fh:
for entry in fh:
entry_dict[entry.name] = [entry.name.split(".")[0], taxid]
with open(f"{taxid}.accession2taxid", "w") as fh:
fh.write("accession\taccession.version\ttaxid\n")
for k, v in entry_dict.items():
fh.write(f"{v[0]}\t{k}\t{v[1]}\n")


if __name__ == "__main__":
args = parse_args()
acc2tax(args.genome, args.taxid)
36 changes: 36 additions & 0 deletions bin/sam2lca_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/usr/bin/env python


import os
import json
import argparse
from pathlib import Path


def parse_args():
parser = argparse.ArgumentParser("Create sam2lca json file")
parser.add_argument(
"acc2taxid", type=Path, help="Path to accession2taxid gzip compressed file"
)
parser.add_argument(
"md5",
type=Path,
help="Path to accession2taxid gzip compressed md5 checksum file",
)

return parser.parse_args()


def write_json(acc2taxid, md5, db_name="adnamap"):
sam2lca_dict = {
"mapfiles": {db_name: [acc2taxid.as_posix()]},
"mapmd5": {db_name: [md5.as_posix()]},
"map_db": {db_name: f"{db_name}.db"},
}
with open(f"{db_name}.sam2lca.json", "w") as fh:
json.dump(sam2lca_dict, fh)


if __name__ == "__main__":
args = parse_args()
write_json(args.acc2taxid, args.md5)
22 changes: 22 additions & 0 deletions modules/local/create_acc2tax.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
process CREATE_ACC2TAX {
tag "${meta.genome_name}"
label 'process_single'

conda (params.enable_conda ? "bioconda::sam2lca=1.1.4" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/sam2lca:1.1.4--pyhdfd78af_0' :
'quay.io/biocontainers/sam2lca:1.1.4--pyhdfd78af_0' }"

input:
tuple val(meta), path(fasta)

output:
path("*.accession2taxid"), emit: acc2tax

script:
def args = task.ext.args ?: ""

"""
create_acc2tax.py $fasta -t ${meta.taxid}
"""
}
25 changes: 25 additions & 0 deletions modules/local/sam2lca/prep_db/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
process SAM2LCA_PREPDB {
label 'process_single'

conda (params.enable_conda ? "bioconda::sam2lca=1.1.4--pyhdfd78af_0" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/sam2lca:1.1.4--pyhdfd78af_0' :
'quay.io/biocontainers/sam2lca:1.1.4--pyhdfd78af_0' }"

input:
path(acc2tax)

output:
path("*.md5"), emit: acc2tax_md5
path("*.json"), emit: acc2tax_json
path("*.gz"), emit: acc2tax_gz

script:
def args = task.ext.args ?: ""

"""
gzip $acc2tax
md5sum ${acc2tax}.gz > ${acc2tax}.gz.md5
sam2lca_json.py ${acc2tax}.gz ${acc2tax}.gz.md5
"""
}
7 changes: 7 additions & 0 deletions modules/local/sam2lca/updatedb/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::sam2lca=1.1.4"
62 changes: 62 additions & 0 deletions modules/local/sam2lca/updatedb/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
process SAM2LCA_UPDATEDB {
tag "${acc2tax_name}"
label 'process_single'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/sam2lca:1.1.4--pyhdfd78af_0':
'biocontainers/sam2lca:1.1.4--pyhdfd78af_0' }"

input:
val(acc2tax_name)
val(taxo_db_name)
path(taxo_nodes)// nodes.dmp
path(taxo_names) // names.dmp
path(taxo_merged) // merged.dmp
path(acc2tax_json) // optional
path(acc2tax) // acc2tax.gz
path(acc2tax_md5) // acc2tax.gz.md5

output:
path "sam2lca_db" , emit: sam2lca_db
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def names = taxo_names ? "--taxo_names ${taxo_names}" : ''
def nodes = taxo_nodes ? "--taxo_nodes ${taxo_nodes}" : ''
def merged = taxo_merged ? "--taxo_merged ${taxo_merged}" : ''
def json = acc2tax_json ? "--acc2tax_json ${acc2tax_json}" : ''
"""
mkdir -p sam2lca_db
sam2lca -d sam2lca_db \\
update-db \\
-t $taxo_db_name \\
$names \\
$nodes \\
$merged \\
-a $acc2tax_name \\
$json \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
sam2lca: \$(echo \$(sam2lca --version 2>&1) | sed 's/^sam2lca, version //' )
END_VERSIONS
"""

stub:
"""
mkdir -p sam2lca_db
touch sam2lca_db/test.pkl
cat <<-END_VERSIONS > versions.yml
"${task.process}":
sam2lca: \$(echo \$(sam2lca --version 2>&1) | sed 's/^sam2lca, version //' )
END_VERSIONS
"""
}
80 changes: 80 additions & 0 deletions modules/local/sam2lca/updatedb/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
name: "sam2lca_updatedb"
description: Build sam2lca database for calling lowest common ancestors from multi-mapped reads in SAM/BAM/CRAM
files
keywords:
- LCA
- alignment
- bam
- metagenomics
- Ancestor
- multimapper
- build
- database
tools:
- "sam2lca":
description: "Lowest Common Ancestor on SAM/BAM/CRAM alignment files"
homepage: "https://github.com/maxibor/sam2lca"
documentation: "https://sam2lca.readthedocs.io"
doi: "10.21105/joss.04360"
licence: ["GPL v3"]
identifier: ""

input:
- - acc2tax_name:
type: string
description: Name of accession2taxid type to use
- - taxo_db_name:
type: string
description: Name of taxonomy dabase type to use
- - taxo_nodes:
type: file
description: "NCBI taxonomy nodes file"
pattern: "*.dmp"
ontologies:
- edam: http://edamontology.org/format_2330
- - taxo_names:
type: file
description: NCBI taxonomy names file
pattern: "*.dmp"
ontologies:
- edam: http://edamontology.org/format_2330
- - taxo_merged:
type: file
description: NCBI taxonomy merged file
pattern: "*.dmp"
ontologies:
- edam: http://edamontology.org/format_2330
- - acc2tax_json:
type: file
description: JSON file listing accession2taxid mapping files. Only required if using a custom database
pattern: "*.json"
ontologies:
- edam: "http://edamontology.org/format_3464"
- - acc2tax:
type: string
description: accession2taxid mapping file compressed with gzip. Only required if using a custom database
pattern: "*.gz"
ontologies:
- edam: http://edamontology.org/format_3989
- - acc2tax_md5:
type: file
description: MD5 checksum of the accession2taxid mapping file. Only required if using a custom database
pattern: "*.md5"
ontologies:
- edam: http://edamontology.org/format_2330

output:
- sam2lca_db:
- sam2lca_db:
type: directory
description: "sam2lca database"
- versions:
- "versions.yml":
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@maxibor"
maintainers:
- "@maxibor"
35 changes: 35 additions & 0 deletions modules/local/sam2lca/updatedb/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
nextflow_process {

name "Test Process SAM2LCA_UPDATEDB"
script "../main.nf"
process "SAM2LCA_UPDATEDB"

tag "modules"
tag "modules_nfcore"
tag "sam2lca"
tag "sam2lca/updatedb"

test("test-sam2lca-updatedb - test dataset") {
when {
process {
"""
input[0] = 'test'
input[1] = 'test'
input[2] = []
input[3] = []
input[4] = []
input[5] = []
input[6] = []
input[7] = []
"""
}
}

then {
assertAll(
{ assert process.success }
)
}
}

}
92 changes: 92 additions & 0 deletions modules/local/sam2lca/updatedb/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
{
"test-sam2lca-updatedb - test taxonomy": {
"content": [
{
"0": [
[
"merged.dmp:md5,d41d8cd98f00b204e9800998ecf8427e",
"merged.dmp.gz:md5,d41d8cd98f00b204e9800998ecf8427e",
"merged.dmp.gz.md5:md5,f6e01130c21a58a3371eddec53a18f6f",
"names.dmp:md5,e7994ec89470481e031b3ecef616e778",
"names.dmp.gz:md5,e7994ec89470481e031b3ecef616e778",
"names.dmp.gz.md5:md5,ce7546bbac7dcbe5c0054975538d7fb7",
"nodes.dmp:md5,9e934f98f3c2ace17fa3d77eb235f96f",
"nodes.dmp.gz:md5,9e934f98f3c2ace17fa3d77eb235f96f",
"nodes.dmp.gz.md5:md5,a4597c31842067abe62dda359f8bd854",
[
"000005.log:md5,9167e183e1dc6070dbfc81c4674d9654",
"CURRENT:md5,6752a1d65b201c13b62ea44016eb221f",
"IDENTITY:md5,730568fb30c4bd8dc6db1b95d288b9dc",
"LOCK:md5,d41d8cd98f00b204e9800998ecf8427e",
"LOG:md5,a6e02bcd897ccd9a28a40cf3dd350292",
"MANIFEST-000004:md5,17211b4c15bffd11c80dfbd5b7db77ef",
"OPTIONS-000007:md5,22187b853e27d095e1eac121574b1d95"
],
"test.pkl:md5,d9f5e1d08d7b678281ac088cdca355c9"
]
],
"1": [
"versions.yml:md5,175fb2cc18a30f7ab660efe1a42b7161"
],
"sam2lca_db": [
[
"merged.dmp:md5,d41d8cd98f00b204e9800998ecf8427e",
"merged.dmp.gz:md5,d41d8cd98f00b204e9800998ecf8427e",
"merged.dmp.gz.md5:md5,f6e01130c21a58a3371eddec53a18f6f",
"names.dmp:md5,e7994ec89470481e031b3ecef616e778",
"names.dmp.gz:md5,e7994ec89470481e031b3ecef616e778",
"names.dmp.gz.md5:md5,ce7546bbac7dcbe5c0054975538d7fb7",
"nodes.dmp:md5,9e934f98f3c2ace17fa3d77eb235f96f",
"nodes.dmp.gz:md5,9e934f98f3c2ace17fa3d77eb235f96f",
"nodes.dmp.gz.md5:md5,a4597c31842067abe62dda359f8bd854",
[
"000005.log:md5,9167e183e1dc6070dbfc81c4674d9654",
"CURRENT:md5,6752a1d65b201c13b62ea44016eb221f",
"IDENTITY:md5,730568fb30c4bd8dc6db1b95d288b9dc",
"LOCK:md5,d41d8cd98f00b204e9800998ecf8427e",
"LOG:md5,a6e02bcd897ccd9a28a40cf3dd350292",
"MANIFEST-000004:md5,17211b4c15bffd11c80dfbd5b7db77ef",
"OPTIONS-000007:md5,22187b853e27d095e1eac121574b1d95"
],
"test.pkl:md5,d9f5e1d08d7b678281ac088cdca355c9"
]
],
"versions": [
"versions.yml:md5,175fb2cc18a30f7ab660efe1a42b7161"
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.10.1"
},
"timestamp": "2024-11-22T14:42:14.067314457"
},
"sam2lca-updatedb - stub": {
"content": [
{
"0": [
[
"test.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"1": [
"versions.yml:md5,175fb2cc18a30f7ab660efe1a42b7161"
],
"sam2lca_db": [
[
"test.pkl:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"versions": [
"versions.yml:md5,175fb2cc18a30f7ab660efe1a42b7161"
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.10.1"
},
"timestamp": "2024-11-22T14:43:21.32148453"
}
}
Loading

0 comments on commit d090c73

Please sign in to comment.