diff --git a/bio/metadmg/compressbam/environment.yaml b/bio/metadmg/compressbam/environment.yaml new file mode 100644 index 00000000000..7370c2427b7 --- /dev/null +++ b/bio/metadmg/compressbam/environment.yaml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - nodefaults +dependencies: + - metadmg =0.4 diff --git a/bio/metadmg/compressbam/meta.yaml b/bio/metadmg/compressbam/meta.yaml new file mode 100644 index 00000000000..28428c35ff6 --- /dev/null +++ b/bio/metadmg/compressbam/meta.yaml @@ -0,0 +1,12 @@ +name: CompressBam +url: https://github.com/metaDMG-dev/metaDMG-cpp +description: metaDMG-cpp is a fast and efficient method for estimating mutation and damage rates in ancient DNA data +authors: + - Filipe G. Vieira +input: + - aln: SAM/BAM/CRAM file + - ref: reference file in FASTA format (mandatory if CRAM input) +output: + - output BAM file +params: + - extra: additional program arguments. diff --git a/bio/metadmg/compressbam/test/Snakefile b/bio/metadmg/compressbam/test/Snakefile new file mode 100644 index 00000000000..08e1095ac4b --- /dev/null +++ b/bio/metadmg/compressbam/test/Snakefile @@ -0,0 +1,15 @@ + +rule compressbam: + input: + aln="{sample}.bam", + output: + "results/compressbam/{sample}.bam", + log: + "logs/compressbam/{sample}.log", + params: + extra="", # optional + threads: 4 + resources: + mem_mb=1024, + wrapper: + "master/bio/metadmg/compressbam" diff --git a/bio/metadmg/compressbam/test/a.bam b/bio/metadmg/compressbam/test/a.bam new file mode 100644 index 00000000000..603edf6d5b2 Binary files /dev/null and b/bio/metadmg/compressbam/test/a.bam differ diff --git a/bio/metadmg/compressbam/wrapper.py b/bio/metadmg/compressbam/wrapper.py new file mode 100644 index 00000000000..205649a4eb8 --- /dev/null +++ b/bio/metadmg/compressbam/wrapper.py @@ -0,0 +1,22 @@ +__author__ = "Filipe G. Vieira" +__copyright__ = "Copyright 2023, Filipe G. Vieira" +__license__ = "MIT" + + +import tempfile +from pathlib import Path +from snakemake.shell import shell + + +extra = snakemake.params.get("extra", "") +log = snakemake.log_fmt_shell(stdout=True, stderr=True) + + +ref = snakemake.input.get("ref", "") +if ref: + ref = f"--ref {ref}" + + +shell( + "compressbam --threads {snakemake.threads} --input {snakemake.input.aln} {ref} {extra} --output {snakemake.output[0]} {log}" +) diff --git a/bio/metadmg/dfit/environment.yaml b/bio/metadmg/dfit/environment.yaml new file mode 100644 index 00000000000..7370c2427b7 --- /dev/null +++ b/bio/metadmg/dfit/environment.yaml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - nodefaults +dependencies: + - metadmg =0.4 diff --git a/bio/metadmg/dfit/meta.yaml b/bio/metadmg/dfit/meta.yaml new file mode 100644 index 00000000000..e095b157d4b --- /dev/null +++ b/bio/metadmg/dfit/meta.yaml @@ -0,0 +1,19 @@ +name: metaDMG dfit +url: https://github.com/metaDMG-dev/metaDMG-cpp +description: metaDMG-cpp is a fast and efficient method for estimating mutation and damage rates in ancient DNA data +authors: + - Filipe G. Vieira +input: + - aln: SAM/BAM/CRAM file + - names: taxonomy file "names.dmp" + - nodes: taxonomy file "nodes.dmp" + - acc2taxid: TSV with correspondence between accesions and taxa IDs +output: + - dmg: path to TSV file containing counts of mismatchs conditional on strand and cycle. + - lca: path to TSV file with LCA results. + - stat: path to TSV file with general stats. +params: + - extra: additional program arguments. +notes: | + * Input BAM file has to be sorted by read name. + * More information about output formats in https://github.com/metaDMG-dev/metaDMG-cpp/blob/master/doc/formats.pdf diff --git a/bio/metadmg/dfit/test/Snakefile b/bio/metadmg/dfit/test/Snakefile new file mode 100644 index 00000000000..e9fd2e58740 --- /dev/null +++ b/bio/metadmg/dfit/test/Snakefile @@ -0,0 +1,20 @@ + +rule metadmg_dfit: + input: + dmg="{sample}.bam", + names="names.dmp.gz", + nodes="nodes.dmp.gz", + lca_stats="{sample}.stat", + output: + dfit="results/dfit/{sample}.out.gz", + stats_dfit="stats/dfit/{sample}.dfit.tsv.gz", + stats_boot="stats/dfit/{sample}.boot.tsv.gz", + log: + "logs/dfit/{sample}.log", + params: + extra="--nopt 10 --doboot 1 --nbootstrap 20 --showfits 2 --lib ds", + threads: 1 + resources: + mem_mb=1024, + wrapper: + "master/bio/metadmg/dfit" diff --git a/bio/metadmg/dfit/test/names.dmp.gz b/bio/metadmg/dfit/test/names.dmp.gz new file mode 100644 index 00000000000..44fda1e8ca2 Binary files /dev/null and b/bio/metadmg/dfit/test/names.dmp.gz differ diff --git a/bio/metadmg/dfit/test/nodes.dmp.gz b/bio/metadmg/dfit/test/nodes.dmp.gz new file mode 100644 index 00000000000..90eaef09c24 Binary files /dev/null and b/bio/metadmg/dfit/test/nodes.dmp.gz differ diff --git a/bio/metadmg/dfit/wrapper.py b/bio/metadmg/dfit/wrapper.py new file mode 100644 index 00000000000..482bea487c7 --- /dev/null +++ b/bio/metadmg/dfit/wrapper.py @@ -0,0 +1,36 @@ +__author__ = "Filipe G. Vieira" +__copyright__ = "Copyright 2023, Filipe G. Vieira" +__license__ = "MIT" + + +import tempfile +from snakemake.shell import shell + + +extra = snakemake.params.get("extra", "") +log = snakemake.log_fmt_shell(stdout=True, stderr=True) + + +names = snakemake.input.get("names", "") +if names: + names = f"--names {names}" + +nodes = snakemake.input.get("nodes", "") +if nodes: + nodes = f"--nodes {nodes}" + +lca_stats = snakemake.input.get("lca_stats", "") +if lca_stats: + lca_stats = f"--lcastat {lca_stats}" + + +with tempfile.TemporaryDirectory() as tmpdir: + shell( + "metaDMG-cpp dfit {snakemake.input.dmg} {names} {nodes} {lca_stats} {extra} --out_prefix {tmpdir}/out {log}" + ) + + for output in snakemake.output: + for ext in [".dfit.txt.gz", ".dfit.stat.txt.gz", ".boot.stat.txt.gz"]: + if output.endswith(ext): + shell("cat {tmpdir}/out{ext} > {output}") + continue diff --git a/bio/metadmg/getdamage/environment.yaml b/bio/metadmg/getdamage/environment.yaml new file mode 100644 index 00000000000..7370c2427b7 --- /dev/null +++ b/bio/metadmg/getdamage/environment.yaml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - nodefaults +dependencies: + - metadmg =0.4 diff --git a/bio/metadmg/getdamage/meta.yaml b/bio/metadmg/getdamage/meta.yaml new file mode 100644 index 00000000000..7790531c7f9 --- /dev/null +++ b/bio/metadmg/getdamage/meta.yaml @@ -0,0 +1,17 @@ +name: metaDMG getdamage +url: https://github.com/metaDMG-dev/metaDMG-cpp +description: metaDMG-cpp is a fast and efficient method for estimating mutation and damage rates in ancient DNA data +authors: + - Filipe G. Vieira +input: + - aln: SAM/BAM/CRAM file + - ref: reference file in FASTA format (mandatory if CRAM input) +output: + - dmg: path to TSV file containing counts of mismatchs conditional on strand and cycle. + - res: path to TSV file with estimates of damage. + - stat: path to TSV file with general stats. +params: + - extra: additional program arguments. +notes: | + * Input BAM file has to be sorted by read name. + * More information about output formats in https://github.com/metaDMG-dev/metaDMG-cpp/blob/master/doc/formats.pdf diff --git a/bio/metadmg/getdamage/test/Snakefile b/bio/metadmg/getdamage/test/Snakefile new file mode 100644 index 00000000000..085c68adc08 --- /dev/null +++ b/bio/metadmg/getdamage/test/Snakefile @@ -0,0 +1,17 @@ + +rule metadmg_getdamage: + input: + aln="{sample}.bam", + output: + res="results/getdamage/{sample}.out.gz", + dmg="results/getdamage/{sample}.dmg.gz", + stats="stats/getdamage/{sample}.tsv", + log: + "logs/getdamage/{sample}.log", + params: + extra="--min_length 30 --print_length 30 --run_mode 1", + threads: 4 + resources: + mem_mb=1024, + wrapper: + "master/bio/metadmg/getdamage" diff --git a/bio/metadmg/getdamage/test/a.bam b/bio/metadmg/getdamage/test/a.bam new file mode 100644 index 00000000000..01219f016b7 Binary files /dev/null and b/bio/metadmg/getdamage/test/a.bam differ diff --git a/bio/metadmg/getdamage/wrapper.py b/bio/metadmg/getdamage/wrapper.py new file mode 100644 index 00000000000..98b61b0388d --- /dev/null +++ b/bio/metadmg/getdamage/wrapper.py @@ -0,0 +1,28 @@ +__author__ = "Filipe G. Vieira" +__copyright__ = "Copyright 2023, Filipe G. Vieira" +__license__ = "MIT" + + +import tempfile +from snakemake.shell import shell + + +extra = snakemake.params.get("extra", "") +log = snakemake.log_fmt_shell(stdout=True, stderr=True) + + +ref = snakemake.input.get("ref", "") +if ref: + ref = f"--fasta {ref}" + + +with tempfile.TemporaryDirectory() as tmpdir: + shell( + "metaDMG-cpp getdamage --threads {snakemake.threads} {ref} {extra} --out_prefix {tmpdir}/out {snakemake.input.aln} {log}" + ) + + for output in snakemake.output: + for ext in [".bdamage.gz", ".res.gz", ".stat"]: + if output.endswith(ext): + shell("cat {tmpdir}/out{ext} > {output}") + continue diff --git a/bio/metadmg/lca/environment.yaml b/bio/metadmg/lca/environment.yaml new file mode 100644 index 00000000000..7370c2427b7 --- /dev/null +++ b/bio/metadmg/lca/environment.yaml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - nodefaults +dependencies: + - metadmg =0.4 diff --git a/bio/metadmg/lca/meta.yaml b/bio/metadmg/lca/meta.yaml new file mode 100644 index 00000000000..60c6551893e --- /dev/null +++ b/bio/metadmg/lca/meta.yaml @@ -0,0 +1,19 @@ +name: metaDMG lca +url: https://github.com/metaDMG-dev/metaDMG-cpp +description: metaDMG-cpp is a fast and efficient method for estimating mutation and damage rates in ancient DNA data +authors: + - Filipe G. Vieira +input: + - aln: SAM/BAM/CRAM file + - names: taxonomy file "names.dmp" + - nodes: taxonomy file "nodes.dmp" + - acc2taxid: TSV with correspondence between accesions and taxa IDs +output: + - dmg: path to TSV file containing counts of mismatchs conditional on strand and cycle. + - lca: path to TSV file with LCA results. + - stat: path to TSV file with general stats. +params: + - extra: additional program arguments. +notes: | + * Input BAM file has to be sorted by read name. + * More information about output formats in https://github.com/metaDMG-dev/metaDMG-cpp/blob/master/doc/formats.pdf diff --git a/bio/metadmg/lca/test/Snakefile b/bio/metadmg/lca/test/Snakefile new file mode 100644 index 00000000000..65cf4733b38 --- /dev/null +++ b/bio/metadmg/lca/test/Snakefile @@ -0,0 +1,20 @@ + +rule metadmg_lca: + input: + aln="{sample}.bam", + names="names.dmp.gz", + nodes="nodes.dmp.gz", + acc2taxid="acc2taxid.tsv", + output: + res="results/lca/{sample}.out.gz", + lca="results/lca/{sample}.lca.gz", + stats="stats/lca/{sample}.tsv", + log: + "logs/lca/{sample}.log", + params: + extra="--sim_score_low 0.95 --sim_score_high 1.0 --min_mapq 30 --how_many 30 --lca_rank genus --fix_ncbi 0", + threads: 4 + resources: + mem_mb=1024, + wrapper: + "master/bio/metadmg/lca" diff --git a/bio/metadmg/lca/test/a.bam b/bio/metadmg/lca/test/a.bam new file mode 100644 index 00000000000..01219f016b7 Binary files /dev/null and b/bio/metadmg/lca/test/a.bam differ diff --git a/bio/metadmg/lca/test/acc2taxid.tsv b/bio/metadmg/lca/test/acc2taxid.tsv new file mode 100644 index 00000000000..562f43d62ab --- /dev/null +++ b/bio/metadmg/lca/test/acc2taxid.tsv @@ -0,0 +1,2 @@ +accession accession.version taxid gi +NC_023100 NC_023100.1 1425170 1 diff --git a/bio/metadmg/lca/test/names.dmp.gz b/bio/metadmg/lca/test/names.dmp.gz new file mode 100644 index 00000000000..c7fd91932ea Binary files /dev/null and b/bio/metadmg/lca/test/names.dmp.gz differ diff --git a/bio/metadmg/lca/test/nodes.dmp.gz b/bio/metadmg/lca/test/nodes.dmp.gz new file mode 100644 index 00000000000..90eaef09c24 Binary files /dev/null and b/bio/metadmg/lca/test/nodes.dmp.gz differ diff --git a/bio/metadmg/lca/wrapper.py b/bio/metadmg/lca/wrapper.py new file mode 100644 index 00000000000..efff62826c6 --- /dev/null +++ b/bio/metadmg/lca/wrapper.py @@ -0,0 +1,23 @@ +__author__ = "Filipe G. Vieira" +__copyright__ = "Copyright 2023, Filipe G. Vieira" +__license__ = "MIT" + + +import tempfile +from snakemake.shell import shell + + +extra = snakemake.params.get("extra", "") +log = snakemake.log_fmt_shell(stdout=True, stderr=True) + + +with tempfile.TemporaryDirectory() as tmpdir: + shell( + "metaDMG-cpp lca --threads {snakemake.threads} --bam {snakemake.input.aln} --names {snakemake.input.names} --nodes {snakemake.input.nodes} --acc2tax {snakemake.input.acc2taxid} --temp {tmpdir} {extra} --out_prefix {tmpdir}/out {log}" + ) + + for output in snakemake.output: + for ext in [".bdamage.gz", ".lca.gz", ".stat"]: + if output.endswith(ext): + shell("cat {tmpdir}/out{ext} > {output}") + continue diff --git a/test.py b/test.py index 5fd7b236c4f..85b4ae2459b 100644 --- a/test.py +++ b/test.py @@ -139,6 +139,72 @@ def run(wrapper, cmd, check_log=None): os.chdir(origdir) +@skip_if_not_modified +def test_metadmg_getdamage(): + run( + "bio/metadmg/getdamage", + [ + "snakemake", + "--cores", + "1", + "--use-conda", + "-F", + "results/getdamage/a.out.gz", + "results/getdamage/a.dmg.gz", + "stats/getdamage/a.tsv", + ], + ) + + +@skip_if_not_modified +def test_metadmg_lca(): + run( + "bio/metadmg/lca", + [ + "snakemake", + "--cores", + "1", + "--use-conda", + "-F", + "results/lca/a.out.gz", + "results/lca/a.lca.gz", + "stats/lca/a.tsv", + ], + ) + + +@skip_if_not_modified +def test_metadmg_dfit(): + run( + "bio/metadmg/dfit", + [ + "snakemake", + "--cores", + "1", + "--use-conda", + "-F", + "results/dfit/a.out.gz", + "stats/dfit/a.dfit.tsv.gz", + "stats/dfit/a.boot.tsv.gz", + ], + ) + + +@skip_if_not_modified +def test_metadmg_compressbam(): + run( + "bio/metadmg/compressbam", + [ + "snakemake", + "--cores", + "1", + "--use-conda", + "-F", + "results/compressbam/a.bam", + ], + ) + + @skip_if_not_modified def test_galah(): run(