-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
444 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
#!/usr/bin/env python | ||
|
||
import argparse | ||
import pysam | ||
from pathlib import Path | ||
|
||
|
||
def parse_args(): | ||
parser = argparse.ArgumentParser("Create acc2tax file") | ||
parser.add_argument("genome", type=Path, help="Path to genome file") | ||
parser.add_argument("-t", type=int, dest="taxid", help="taxid") | ||
|
||
return parser.parse_args() | ||
|
||
|
||
def acc2tax(genome, taxid): | ||
entry_dict = dict() | ||
with pysam.FastxFile(genome) as fh: | ||
for entry in fh: | ||
entry_dict[entry.name] = [entry.name.split(".")[0], taxid] | ||
with open(f"{taxid}.accession2taxid", "w") as fh: | ||
fh.write("accession\taccession.version\ttaxid\n") | ||
for k, v in entry_dict.items(): | ||
fh.write(f"{v[0]}\t{k}\t{v[1]}\n") | ||
|
||
|
||
if __name__ == "__main__": | ||
args = parse_args() | ||
acc2tax(args.genome, args.taxid) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
#!/usr/bin/env python | ||
|
||
|
||
import os | ||
import json | ||
import argparse | ||
from pathlib import Path | ||
|
||
|
||
def parse_args(): | ||
parser = argparse.ArgumentParser("Create sam2lca json file") | ||
parser.add_argument( | ||
"acc2taxid", type=Path, help="Path to accession2taxid gzip compressed file" | ||
) | ||
parser.add_argument( | ||
"md5", | ||
type=Path, | ||
help="Path to accession2taxid gzip compressed md5 checksum file", | ||
) | ||
|
||
return parser.parse_args() | ||
|
||
|
||
def write_json(acc2taxid, md5, db_name="adnamap"): | ||
sam2lca_dict = { | ||
"mapfiles": {db_name: [acc2taxid.as_posix()]}, | ||
"mapmd5": {db_name: [md5.as_posix()]}, | ||
"map_db": {db_name: f"{db_name}.db"}, | ||
} | ||
with open(f"{db_name}.sam2lca.json", "w") as fh: | ||
json.dump(sam2lca_dict, fh) | ||
|
||
|
||
if __name__ == "__main__": | ||
args = parse_args() | ||
write_json(args.acc2taxid, args.md5) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
process CREATE_ACC2TAX { | ||
tag "${meta.genome_name}" | ||
label 'process_single' | ||
|
||
conda (params.enable_conda ? "bioconda::sam2lca=1.1.4" : null) | ||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'https://depot.galaxyproject.org/singularity/sam2lca:1.1.4--pyhdfd78af_0' : | ||
'quay.io/biocontainers/sam2lca:1.1.4--pyhdfd78af_0' }" | ||
|
||
input: | ||
tuple val(meta), path(fasta) | ||
|
||
output: | ||
path("*.accession2taxid"), emit: acc2tax | ||
|
||
script: | ||
def args = task.ext.args ?: "" | ||
|
||
""" | ||
create_acc2tax.py $fasta -t ${meta.taxid} | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
process SAM2LCA_PREPDB { | ||
label 'process_single' | ||
|
||
conda (params.enable_conda ? "bioconda::sam2lca=1.1.4--pyhdfd78af_0" : null) | ||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'https://depot.galaxyproject.org/singularity/sam2lca:1.1.4--pyhdfd78af_0' : | ||
'quay.io/biocontainers/sam2lca:1.1.4--pyhdfd78af_0' }" | ||
|
||
input: | ||
path(acc2tax) | ||
|
||
output: | ||
path("*.md5"), emit: acc2tax_md5 | ||
path("*.json"), emit: acc2tax_json | ||
path("*.gz"), emit: acc2tax_gz | ||
|
||
script: | ||
def args = task.ext.args ?: "" | ||
|
||
""" | ||
gzip $acc2tax | ||
md5sum ${acc2tax}.gz > ${acc2tax}.gz.md5 | ||
sam2lca_json.py ${acc2tax}.gz ${acc2tax}.gz.md5 | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
--- | ||
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json | ||
channels: | ||
- conda-forge | ||
- bioconda | ||
dependencies: | ||
- "bioconda::sam2lca=1.1.4" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
process SAM2LCA_UPDATEDB { | ||
tag "${acc2tax_name}" | ||
label 'process_single' | ||
|
||
conda "${moduleDir}/environment.yml" | ||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'https://depot.galaxyproject.org/singularity/sam2lca:1.1.4--pyhdfd78af_0': | ||
'biocontainers/sam2lca:1.1.4--pyhdfd78af_0' }" | ||
|
||
input: | ||
val(acc2tax_name) | ||
val(taxo_db_name) | ||
path(taxo_nodes)// nodes.dmp | ||
path(taxo_names) // names.dmp | ||
path(taxo_merged) // merged.dmp | ||
path(acc2tax_json) // optional | ||
path(acc2tax) // acc2tax.gz | ||
path(acc2tax_md5) // acc2tax.gz.md5 | ||
|
||
output: | ||
path "sam2lca_db" , emit: sam2lca_db | ||
path "versions.yml" , emit: versions | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
script: | ||
def args = task.ext.args ?: '' | ||
def names = taxo_names ? "--taxo_names ${taxo_names}" : '' | ||
def nodes = taxo_nodes ? "--taxo_nodes ${taxo_nodes}" : '' | ||
def merged = taxo_merged ? "--taxo_merged ${taxo_merged}" : '' | ||
def json = acc2tax_json ? "--acc2tax_json ${acc2tax_json}" : '' | ||
""" | ||
mkdir -p sam2lca_db | ||
sam2lca -d sam2lca_db \\ | ||
update-db \\ | ||
-t $taxo_db_name \\ | ||
$names \\ | ||
$nodes \\ | ||
$merged \\ | ||
-a $acc2tax_name \\ | ||
$json \\ | ||
$args | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
sam2lca: \$(echo \$(sam2lca --version 2>&1) | sed 's/^sam2lca, version //' ) | ||
END_VERSIONS | ||
""" | ||
|
||
stub: | ||
""" | ||
mkdir -p sam2lca_db | ||
touch sam2lca_db/test.pkl | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
sam2lca: \$(echo \$(sam2lca --version 2>&1) | sed 's/^sam2lca, version //' ) | ||
END_VERSIONS | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
name: "sam2lca_updatedb" | ||
description: Build sam2lca database for calling lowest common ancestors from multi-mapped reads in SAM/BAM/CRAM | ||
files | ||
keywords: | ||
- LCA | ||
- alignment | ||
- bam | ||
- metagenomics | ||
- Ancestor | ||
- multimapper | ||
- build | ||
- database | ||
tools: | ||
- "sam2lca": | ||
description: "Lowest Common Ancestor on SAM/BAM/CRAM alignment files" | ||
homepage: "https://github.com/maxibor/sam2lca" | ||
documentation: "https://sam2lca.readthedocs.io" | ||
doi: "10.21105/joss.04360" | ||
licence: ["GPL v3"] | ||
identifier: "" | ||
|
||
input: | ||
- - acc2tax_name: | ||
type: string | ||
description: Name of accession2taxid type to use | ||
- - taxo_db_name: | ||
type: string | ||
description: Name of taxonomy dabase type to use | ||
- - taxo_nodes: | ||
type: file | ||
description: "NCBI taxonomy nodes file" | ||
pattern: "*.dmp" | ||
ontologies: | ||
- edam: http://edamontology.org/format_2330 | ||
- - taxo_names: | ||
type: file | ||
description: NCBI taxonomy names file | ||
pattern: "*.dmp" | ||
ontologies: | ||
- edam: http://edamontology.org/format_2330 | ||
- - taxo_merged: | ||
type: file | ||
description: NCBI taxonomy merged file | ||
pattern: "*.dmp" | ||
ontologies: | ||
- edam: http://edamontology.org/format_2330 | ||
- - acc2tax_json: | ||
type: file | ||
description: JSON file listing accession2taxid mapping files. Only required if using a custom database | ||
pattern: "*.json" | ||
ontologies: | ||
- edam: "http://edamontology.org/format_3464" | ||
- - acc2tax: | ||
type: string | ||
description: accession2taxid mapping file compressed with gzip. Only required if using a custom database | ||
pattern: "*.gz" | ||
ontologies: | ||
- edam: http://edamontology.org/format_3989 | ||
- - acc2tax_md5: | ||
type: file | ||
description: MD5 checksum of the accession2taxid mapping file. Only required if using a custom database | ||
pattern: "*.md5" | ||
ontologies: | ||
- edam: http://edamontology.org/format_2330 | ||
|
||
output: | ||
- sam2lca_db: | ||
- sam2lca_db: | ||
type: directory | ||
description: "sam2lca database" | ||
- versions: | ||
- "versions.yml": | ||
type: file | ||
description: File containing software versions | ||
pattern: "versions.yml" | ||
|
||
authors: | ||
- "@maxibor" | ||
maintainers: | ||
- "@maxibor" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
nextflow_process { | ||
|
||
name "Test Process SAM2LCA_UPDATEDB" | ||
script "../main.nf" | ||
process "SAM2LCA_UPDATEDB" | ||
|
||
tag "modules" | ||
tag "modules_nfcore" | ||
tag "sam2lca" | ||
tag "sam2lca/updatedb" | ||
|
||
test("test-sam2lca-updatedb - test dataset") { | ||
when { | ||
process { | ||
""" | ||
input[0] = 'test' | ||
input[1] = 'test' | ||
input[2] = [] | ||
input[3] = [] | ||
input[4] = [] | ||
input[5] = [] | ||
input[6] = [] | ||
input[7] = [] | ||
""" | ||
} | ||
} | ||
|
||
then { | ||
assertAll( | ||
{ assert process.success } | ||
) | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
{ | ||
"test-sam2lca-updatedb - test taxonomy": { | ||
"content": [ | ||
{ | ||
"0": [ | ||
[ | ||
"merged.dmp:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
"merged.dmp.gz:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
"merged.dmp.gz.md5:md5,f6e01130c21a58a3371eddec53a18f6f", | ||
"names.dmp:md5,e7994ec89470481e031b3ecef616e778", | ||
"names.dmp.gz:md5,e7994ec89470481e031b3ecef616e778", | ||
"names.dmp.gz.md5:md5,ce7546bbac7dcbe5c0054975538d7fb7", | ||
"nodes.dmp:md5,9e934f98f3c2ace17fa3d77eb235f96f", | ||
"nodes.dmp.gz:md5,9e934f98f3c2ace17fa3d77eb235f96f", | ||
"nodes.dmp.gz.md5:md5,a4597c31842067abe62dda359f8bd854", | ||
[ | ||
"000005.log:md5,9167e183e1dc6070dbfc81c4674d9654", | ||
"CURRENT:md5,6752a1d65b201c13b62ea44016eb221f", | ||
"IDENTITY:md5,730568fb30c4bd8dc6db1b95d288b9dc", | ||
"LOCK:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
"LOG:md5,a6e02bcd897ccd9a28a40cf3dd350292", | ||
"MANIFEST-000004:md5,17211b4c15bffd11c80dfbd5b7db77ef", | ||
"OPTIONS-000007:md5,22187b853e27d095e1eac121574b1d95" | ||
], | ||
"test.pkl:md5,d9f5e1d08d7b678281ac088cdca355c9" | ||
] | ||
], | ||
"1": [ | ||
"versions.yml:md5,175fb2cc18a30f7ab660efe1a42b7161" | ||
], | ||
"sam2lca_db": [ | ||
[ | ||
"merged.dmp:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
"merged.dmp.gz:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
"merged.dmp.gz.md5:md5,f6e01130c21a58a3371eddec53a18f6f", | ||
"names.dmp:md5,e7994ec89470481e031b3ecef616e778", | ||
"names.dmp.gz:md5,e7994ec89470481e031b3ecef616e778", | ||
"names.dmp.gz.md5:md5,ce7546bbac7dcbe5c0054975538d7fb7", | ||
"nodes.dmp:md5,9e934f98f3c2ace17fa3d77eb235f96f", | ||
"nodes.dmp.gz:md5,9e934f98f3c2ace17fa3d77eb235f96f", | ||
"nodes.dmp.gz.md5:md5,a4597c31842067abe62dda359f8bd854", | ||
[ | ||
"000005.log:md5,9167e183e1dc6070dbfc81c4674d9654", | ||
"CURRENT:md5,6752a1d65b201c13b62ea44016eb221f", | ||
"IDENTITY:md5,730568fb30c4bd8dc6db1b95d288b9dc", | ||
"LOCK:md5,d41d8cd98f00b204e9800998ecf8427e", | ||
"LOG:md5,a6e02bcd897ccd9a28a40cf3dd350292", | ||
"MANIFEST-000004:md5,17211b4c15bffd11c80dfbd5b7db77ef", | ||
"OPTIONS-000007:md5,22187b853e27d095e1eac121574b1d95" | ||
], | ||
"test.pkl:md5,d9f5e1d08d7b678281ac088cdca355c9" | ||
] | ||
], | ||
"versions": [ | ||
"versions.yml:md5,175fb2cc18a30f7ab660efe1a42b7161" | ||
] | ||
} | ||
], | ||
"meta": { | ||
"nf-test": "0.8.4", | ||
"nextflow": "24.10.1" | ||
}, | ||
"timestamp": "2024-11-22T14:42:14.067314457" | ||
}, | ||
"sam2lca-updatedb - stub": { | ||
"content": [ | ||
{ | ||
"0": [ | ||
[ | ||
"test.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
] | ||
], | ||
"1": [ | ||
"versions.yml:md5,175fb2cc18a30f7ab660efe1a42b7161" | ||
], | ||
"sam2lca_db": [ | ||
[ | ||
"test.pkl:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
] | ||
], | ||
"versions": [ | ||
"versions.yml:md5,175fb2cc18a30f7ab660efe1a42b7161" | ||
] | ||
} | ||
], | ||
"meta": { | ||
"nf-test": "0.8.4", | ||
"nextflow": "24.10.1" | ||
}, | ||
"timestamp": "2024-11-22T14:43:21.32148453" | ||
} | ||
} |
Oops, something went wrong.