From 1fc2cb4565f4987246915e46cb350cb9bf1d94fe Mon Sep 17 00:00:00 2001 From: fgvieira <1151762+fgvieira@users.noreply.github.com> Date: Tue, 19 Sep 2023 16:19:55 +0200 Subject: [PATCH 1/4] Add wrapper for metaDMG --- bio/metadmg/compressbam/environment.yaml | 6 +++ bio/metadmg/compressbam/meta.yaml | 12 ++++++ bio/metadmg/compressbam/test/Snakefile | 15 +++++++ bio/metadmg/compressbam/test/a.bam | Bin 0 -> 521 bytes bio/metadmg/compressbam/wrapper.py | 22 ++++++++++ bio/metadmg/getdamage/environment.yaml | 6 +++ bio/metadmg/getdamage/meta.yaml | 17 ++++++++ bio/metadmg/getdamage/test/Snakefile | 18 ++++++++ bio/metadmg/getdamage/test/a.bam | Bin 0 -> 521 bytes bio/metadmg/getdamage/test/genome.fasta | 4 ++ bio/metadmg/getdamage/wrapper.py | 32 +++++++++++++++ bio/metadmg/lca/environment.yaml | 6 +++ bio/metadmg/lca/meta.yaml | 20 +++++++++ bio/metadmg/lca/test/Snakefile | 18 ++++++++ bio/metadmg/lca/test/a.bam | Bin 0 -> 521 bytes bio/metadmg/lca/wrapper.py | 31 ++++++++++++++ test.py | 50 +++++++++++++++++++++++ 17 files changed, 257 insertions(+) create mode 100644 bio/metadmg/compressbam/environment.yaml create mode 100644 bio/metadmg/compressbam/meta.yaml create mode 100644 bio/metadmg/compressbam/test/Snakefile create mode 100644 bio/metadmg/compressbam/test/a.bam create mode 100644 bio/metadmg/compressbam/wrapper.py create mode 100644 bio/metadmg/getdamage/environment.yaml create mode 100644 bio/metadmg/getdamage/meta.yaml create mode 100644 bio/metadmg/getdamage/test/Snakefile create mode 100644 bio/metadmg/getdamage/test/a.bam create mode 100644 bio/metadmg/getdamage/test/genome.fasta create mode 100644 bio/metadmg/getdamage/wrapper.py create mode 100644 bio/metadmg/lca/environment.yaml create mode 100644 bio/metadmg/lca/meta.yaml create mode 100644 bio/metadmg/lca/test/Snakefile create mode 100644 bio/metadmg/lca/test/a.bam create mode 100644 bio/metadmg/lca/wrapper.py diff --git a/bio/metadmg/compressbam/environment.yaml b/bio/metadmg/compressbam/environment.yaml new file mode 100644 index 00000000000..1d9b3558d4d --- /dev/null +++ b/bio/metadmg/compressbam/environment.yaml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - nodefaults +dependencies: + - metadmg =0.3 diff --git a/bio/metadmg/compressbam/meta.yaml b/bio/metadmg/compressbam/meta.yaml new file mode 100644 index 00000000000..28428c35ff6 --- /dev/null +++ b/bio/metadmg/compressbam/meta.yaml @@ -0,0 +1,12 @@ +name: CompressBam +url: https://github.com/metaDMG-dev/metaDMG-cpp +description: metaDMG-cpp is a fast and efficient method for estimating mutation and damage rates in ancient DNA data +authors: + - Filipe G. Vieira +input: + - aln: SAM/BAM/CRAM file + - ref: reference file in FASTA format (mandatory if CRAM input) +output: + - output BAM file +params: + - extra: additional program arguments. diff --git a/bio/metadmg/compressbam/test/Snakefile b/bio/metadmg/compressbam/test/Snakefile new file mode 100644 index 00000000000..de64b3a5a90 --- /dev/null +++ b/bio/metadmg/compressbam/test/Snakefile @@ -0,0 +1,15 @@ + +rule compressbam: + input: + aln="{sample}.bam", + output: + "results/compressbam/{sample}.bam", + log: + "logs/compressbam/{sample}.log", + params: + extra="", # optional + threads: 4 + resources: + mem_mb=1024, + wrapper: + "master/bio/metadmg/compressbam" diff --git a/bio/metadmg/compressbam/test/a.bam b/bio/metadmg/compressbam/test/a.bam new file mode 100644 index 0000000000000000000000000000000000000000..a407ae2040d5f118d107ef42cdafa491bf9aa549 GIT binary patch literal 521 zcmb2|=3rp}f&Xj_PR>jW$qdCsUsAWECnOYb@IB%Awpq)|_wkpHn~e;gI&nxey7KIh zX}V)+8@r}RyIZ)JyC}%Ax%Od>p4tLI9S5F}a|^haL@MSev+=UAu`y&El!{FO8ZD1z zM+al?r2T$}4Fry@*L4hOcp=Knws6J72`6sU8>lZ7+90a6z<@ExTx->)17gOhZ>&E! ztvO@EG3Ro2s^!`!@A)!w-`@W|cdla3ajlbu`ltD74w_x>^EkdFacflb3_GVF2d&Ag zmbiF`D23F0m@a!a?@HuC--y%A4U94K?Z4RHoA;}Ey4;>khoyFleYLyvM5ibAb&0?M zW!p(*m6ucaDtHY;7O8HW(wPwDHZ67D>}@*I1&7NH9oJcUc#7__7i{L6rlfiOc)B2W zlE}XIuAdLJ_Snv8({41@j_EI46?)R)Tu=Jt2{KD{i+sy}Isd-AT;^qWx03$6nNPdI zifh(PNK&X}Gd^9-%e1Py+eK4nu}k@}4exc2&vGcw$&kGjlCRq7@+IKpjo6mjv?Ygc zFr8&L+cwc`&H5kF|2Ol5KMJmVcPDYzoz31CIriu8Hc52(s%hD?VUx(8zK5^bVs3p9 w?pT?(;cx1N?LGHQE>xTS5@8E5Ke1Hx#Q#74zn?J!Ml*UuNHZ{l;~qo+0Fsg3fB*mh literal 0 HcmV?d00001 diff --git a/bio/metadmg/compressbam/wrapper.py b/bio/metadmg/compressbam/wrapper.py new file mode 100644 index 00000000000..1c787d3c016 --- /dev/null +++ b/bio/metadmg/compressbam/wrapper.py @@ -0,0 +1,22 @@ +__author__ = "Filipe G. Vieira" +__copyright__ = "Copyright 2023, Filipe G. Vieira" +__license__ = "MIT" + + +import tempfile +from snakemake.shell import shell + + +extra = snakemake.params.get("extra", "") +log = snakemake.log_fmt_shell(stdout=True, stderr=True) + + +ref = snakemake.input.get("ref", "") +if ref: + ref = f"--ref {ref}" + + +out_fmt = Path(snakemake.input.aln).suffix + + +shell("compressbam --threads {snakemake.threads} -hts {snakemake.input.aln} {ref} {extra} -type {out_fmt} -out {snakemake.output[0]} {log}") diff --git a/bio/metadmg/getdamage/environment.yaml b/bio/metadmg/getdamage/environment.yaml new file mode 100644 index 00000000000..1d9b3558d4d --- /dev/null +++ b/bio/metadmg/getdamage/environment.yaml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - nodefaults +dependencies: + - metadmg =0.3 diff --git a/bio/metadmg/getdamage/meta.yaml b/bio/metadmg/getdamage/meta.yaml new file mode 100644 index 00000000000..7790531c7f9 --- /dev/null +++ b/bio/metadmg/getdamage/meta.yaml @@ -0,0 +1,17 @@ +name: metaDMG getdamage +url: https://github.com/metaDMG-dev/metaDMG-cpp +description: metaDMG-cpp is a fast and efficient method for estimating mutation and damage rates in ancient DNA data +authors: + - Filipe G. Vieira +input: + - aln: SAM/BAM/CRAM file + - ref: reference file in FASTA format (mandatory if CRAM input) +output: + - dmg: path to TSV file containing counts of mismatchs conditional on strand and cycle. + - res: path to TSV file with estimates of damage. + - stat: path to TSV file with general stats. +params: + - extra: additional program arguments. +notes: | + * Input BAM file has to be sorted by read name. + * More information about output formats in https://github.com/metaDMG-dev/metaDMG-cpp/blob/master/doc/formats.pdf diff --git a/bio/metadmg/getdamage/test/Snakefile b/bio/metadmg/getdamage/test/Snakefile new file mode 100644 index 00000000000..b59c82f1bce --- /dev/null +++ b/bio/metadmg/getdamage/test/Snakefile @@ -0,0 +1,18 @@ + +rule metadmg_getdamage: + input: + aln="{sample}.bam", + ref="genome.fasta", + output: + res="results/getdamage/{sample}.out.gz", + dmg="results/getdamage/{sample}.dmg.gz", + stats="results/getdamage/{sample}.tsv", + log: + "logs/getdamage/{sample}.log", + params: + extra="--minlength 30 --printlength 30 --runmode 1", + threads: 4 + resources: + mem_mb=1024, + wrapper: + "master/bio/metadmg/getdamage" diff --git a/bio/metadmg/getdamage/test/a.bam b/bio/metadmg/getdamage/test/a.bam new file mode 100644 index 0000000000000000000000000000000000000000..a407ae2040d5f118d107ef42cdafa491bf9aa549 GIT binary patch literal 521 zcmb2|=3rp}f&Xj_PR>jW$qdCsUsAWECnOYb@IB%Awpq)|_wkpHn~e;gI&nxey7KIh zX}V)+8@r}RyIZ)JyC}%Ax%Od>p4tLI9S5F}a|^haL@MSev+=UAu`y&El!{FO8ZD1z zM+al?r2T$}4Fry@*L4hOcp=Knws6J72`6sU8>lZ7+90a6z<@ExTx->)17gOhZ>&E! ztvO@EG3Ro2s^!`!@A)!w-`@W|cdla3ajlbu`ltD74w_x>^EkdFacflb3_GVF2d&Ag zmbiF`D23F0m@a!a?@HuC--y%A4U94K?Z4RHoA;}Ey4;>khoyFleYLyvM5ibAb&0?M zW!p(*m6ucaDtHY;7O8HW(wPwDHZ67D>}@*I1&7NH9oJcUc#7__7i{L6rlfiOc)B2W zlE}XIuAdLJ_Snv8({41@j_EI46?)R)Tu=Jt2{KD{i+sy}Isd-AT;^qWx03$6nNPdI zifh(PNK&X}Gd^9-%e1Py+eK4nu}k@}4exc2&vGcw$&kGjlCRq7@+IKpjo6mjv?Ygc zFr8&L+cwc`&H5kF|2Ol5KMJmVcPDYzoz31CIriu8Hc52(s%hD?VUx(8zK5^bVs3p9 w?pT?(;cx1N?LGHQE>xTS5@8E5Ke1Hx#Q#74zn?J!Ml*UuNHZ{l;~qo+0Fsg3fB*mh literal 0 HcmV?d00001 diff --git a/bio/metadmg/getdamage/test/genome.fasta b/bio/metadmg/getdamage/test/genome.fasta new file mode 100644 index 00000000000..afe990a63bc --- /dev/null +++ b/bio/metadmg/getdamage/test/genome.fasta @@ -0,0 +1,4 @@ +>ref +AGCATGTTAGATAAGATAGCTGTGCTAGTAGGCAGTCAGCGCCAT +>ref2 +aggttttataaaacaattaagtctacagagcaactacgcg diff --git a/bio/metadmg/getdamage/wrapper.py b/bio/metadmg/getdamage/wrapper.py new file mode 100644 index 00000000000..f8f561a9a5f --- /dev/null +++ b/bio/metadmg/getdamage/wrapper.py @@ -0,0 +1,32 @@ +__author__ = "Filipe G. Vieira" +__copyright__ = "Copyright 2023, Filipe G. Vieira" +__license__ = "MIT" + + +import tempfile +from snakemake.shell import shell + + +extra = snakemake.params.get("extra", "") +log = snakemake.log_fmt_shell(stdout=True, stderr=True) + + +ref = snakemake.input.get("ref", "") +if ref: + ref = f"--fasta {ref}" + + +with tempfile.TemporaryDirectory() as tmpdir: + shell("metaDMG-cpp getdamage --threads {snakemake.threads} {ref} {extra} --outname {tmpdir}/out {input.aln} {log}") + + if snakemake.output.get("dmg"): + assert snakemake.output.dmg.endswith(".gz"), "'dmg' file is Gzip compressed, but extension is not '.gz'" + shell("cat {tmpdir}/out.bdamage.gz > {snakemake.output.dmg}") + + if snakemake.output.get("res"): + assert snakemake.output.res.endswith(".gz"), "'res' file is Gzip compressed, but extension is not '.gz'" + shell("cat {tmpdir}/out.res.gz > {snakemake.output.res}") + + if snakemake.output.get("stats"): + assert not snakemake.output.stats.endswith(".gz"), "'stats' file is NOT Gzip compressed, but extension is '.gz'" + shell("cat {tmpdir}/out.stat > {snakemake.output.stats}") diff --git a/bio/metadmg/lca/environment.yaml b/bio/metadmg/lca/environment.yaml new file mode 100644 index 00000000000..1d9b3558d4d --- /dev/null +++ b/bio/metadmg/lca/environment.yaml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - nodefaults +dependencies: + - metadmg =0.3 diff --git a/bio/metadmg/lca/meta.yaml b/bio/metadmg/lca/meta.yaml new file mode 100644 index 00000000000..87cb037f17f --- /dev/null +++ b/bio/metadmg/lca/meta.yaml @@ -0,0 +1,20 @@ +name: metaDMG lca +url: https://github.com/metaDMG-dev/metaDMG-cpp +description: metaDMG-cpp is a fast and efficient method for estimating mutation and damage rates in ancient DNA data +authors: + - Filipe G. Vieira +input: + - aln: SAM/BAM/CRAM file + - names: taxonomy file "names.dmp" + - nodes: taxonomy file "nodes.dmp" + - acc2taxid: TSV with correspondence between accesions and taxa IDs +output: + - dmg: path to TSV file containing counts of mismatchs conditional on strand and cycle. + - lca: path to TSV file with LCA results. + - stat: path to TSV file with general stats. + - log: path to log file. +params: + - extra: additional program arguments. +notes: | + * Input BAM file has to be sorted by read name. + * More information about output formats in https://github.com/metaDMG-dev/metaDMG-cpp/blob/master/doc/formats.pdf diff --git a/bio/metadmg/lca/test/Snakefile b/bio/metadmg/lca/test/Snakefile new file mode 100644 index 00000000000..aa27cf87760 --- /dev/null +++ b/bio/metadmg/lca/test/Snakefile @@ -0,0 +1,18 @@ + +rule metadmg_lca: + input: + aln="{sample}.bam", + output: + res="results/lca/{sample}.out.gz", + lca="results/lca/{sample}.lca.gz", + stats="results/lca/{sample}.tsv", + log="results/lca/{sample}.log", + log: + "logs/lca/{sample}.log", + params: + extra="-simscorelow 0.95 -simscorehigh 1.0 -minmapq 30 -howmany 30 -lca_rank species", + threads: 4 + resources: + mem_mb=1024, + wrapper: + "master/bio/metadmg/lca" diff --git a/bio/metadmg/lca/test/a.bam b/bio/metadmg/lca/test/a.bam new file mode 100644 index 0000000000000000000000000000000000000000..a407ae2040d5f118d107ef42cdafa491bf9aa549 GIT binary patch literal 521 zcmb2|=3rp}f&Xj_PR>jW$qdCsUsAWECnOYb@IB%Awpq)|_wkpHn~e;gI&nxey7KIh zX}V)+8@r}RyIZ)JyC}%Ax%Od>p4tLI9S5F}a|^haL@MSev+=UAu`y&El!{FO8ZD1z zM+al?r2T$}4Fry@*L4hOcp=Knws6J72`6sU8>lZ7+90a6z<@ExTx->)17gOhZ>&E! ztvO@EG3Ro2s^!`!@A)!w-`@W|cdla3ajlbu`ltD74w_x>^EkdFacflb3_GVF2d&Ag zmbiF`D23F0m@a!a?@HuC--y%A4U94K?Z4RHoA;}Ey4;>khoyFleYLyvM5ibAb&0?M zW!p(*m6ucaDtHY;7O8HW(wPwDHZ67D>}@*I1&7NH9oJcUc#7__7i{L6rlfiOc)B2W zlE}XIuAdLJ_Snv8({41@j_EI46?)R)Tu=Jt2{KD{i+sy}Isd-AT;^qWx03$6nNPdI zifh(PNK&X}Gd^9-%e1Py+eK4nu}k@}4exc2&vGcw$&kGjlCRq7@+IKpjo6mjv?Ygc zFr8&L+cwc`&H5kF|2Ol5KMJmVcPDYzoz31CIriu8Hc52(s%hD?VUx(8zK5^bVs3p9 w?pT?(;cx1N?LGHQE>xTS5@8E5Ke1Hx#Q#74zn?J!Ml*UuNHZ{l;~qo+0Fsg3fB*mh literal 0 HcmV?d00001 diff --git a/bio/metadmg/lca/wrapper.py b/bio/metadmg/lca/wrapper.py new file mode 100644 index 00000000000..7ec85570fe2 --- /dev/null +++ b/bio/metadmg/lca/wrapper.py @@ -0,0 +1,31 @@ +__author__ = "Filipe G. Vieira" +__copyright__ = "Copyright 2023, Filipe G. Vieira" +__license__ = "MIT" + + +import tempfile +from snakemake.shell import shell + + +extra = snakemake.params.get("extra", "") +log = snakemake.log_fmt_shell(stdout=True, stderr=True) + + +with tempfile.TemporaryDirectory() as tmpdir: + shell("metaDMG-cpp lca -bam {input.aln} -names {input.names} -nodes {input.nodes} -acc2tax {input.acc2taxid} {extra} --outnames {tmpdir}/out {log}") + + if snakemake.output.get("dmg"): + assert snakemake.output.dmg.endswith(".gz"), "'dmg' file is Gzip compressed, but extension is not '.gz'" + shell("cat {tmpdir}/out.bdamage.gz > {snakemake.output.dmg}") + + if snakemake.output.get("lca"): + assert snakemake.output.lca.endswith(".gz"), "'lca' file is Gzip compressed, but extension is not '.gz'" + shell("cat {tmpdir}/out.lca.gz > {snakemake.output.lca}") + + if snakemake.output.get("stats"): + assert not snakemake.output.stats.endswith(".gz"), "'stats' file is NOT Gzip compressed, but extension is '.gz'" + shell("cat {tmpdir}/out.stat > {snakemake.output.stats}") + + if snakemake.output.get("log"): + assert not snakemake.output.log.endswith(".gz"), "'log' file is NOT Gzip compressed, but extension is '.gz'" + shell("cat {tmpdir}/out.log > {snakemake.output.log}") diff --git a/test.py b/test.py index 5fd7b236c4f..93cebebb0f5 100644 --- a/test.py +++ b/test.py @@ -139,6 +139,56 @@ def run(wrapper, cmd, check_log=None): os.chdir(origdir) +@skip_if_not_modified +def test_metadmg_getdamage(): + run( + "bio/metadmg/getdamage", + [ + "snakemake", + "--cores", + "1", + "--use-conda", + "-F", + "results/getdamage/a.out.gz", + "results/getdamage/a.dmg.gz", + "results/getdamage/a.tsv", + ], + ) + + +@skip_if_not_modified +def test_metadmg_lca(): + run( + "bio/metadmg/lca", + [ + "snakemake", + "--cores", + "1", + "--use-conda", + "-F", + "results/lca/a.out.gz", + "results/lca/a.lca.gz", + "results/lca/a.tsv", + "results/lca/a.log", + ], + ) + + +@skip_if_not_modified +def test_metadmg_compressbam(): + run( + "bio/metadmg/compressbam", + [ + "snakemake", + "--cores", + "1", + "--use-conda", + "-F", + "results/compressbam/a.bam", + ], + ) + + @skip_if_not_modified def test_galah(): run( From 9b0d2dc1d3e1a1c7e8cd58ebd572bc641a3de56c Mon Sep 17 00:00:00 2001 From: fgvieira <1151762+fgvieira@users.noreply.github.com> Date: Tue, 19 Sep 2023 18:51:49 +0200 Subject: [PATCH 2/4] Code format --- bio/metadmg/compressbam/test/Snakefile | 2 +- bio/metadmg/compressbam/wrapper.py | 6 ++++-- bio/metadmg/getdamage/wrapper.py | 16 ++++++++++++---- bio/metadmg/lca/wrapper.py | 20 +++++++++++++++----- 4 files changed, 32 insertions(+), 12 deletions(-) diff --git a/bio/metadmg/compressbam/test/Snakefile b/bio/metadmg/compressbam/test/Snakefile index de64b3a5a90..08e1095ac4b 100644 --- a/bio/metadmg/compressbam/test/Snakefile +++ b/bio/metadmg/compressbam/test/Snakefile @@ -7,7 +7,7 @@ rule compressbam: log: "logs/compressbam/{sample}.log", params: - extra="", # optional + extra="", # optional threads: 4 resources: mem_mb=1024, diff --git a/bio/metadmg/compressbam/wrapper.py b/bio/metadmg/compressbam/wrapper.py index 1c787d3c016..45465f23194 100644 --- a/bio/metadmg/compressbam/wrapper.py +++ b/bio/metadmg/compressbam/wrapper.py @@ -16,7 +16,9 @@ ref = f"--ref {ref}" -out_fmt = Path(snakemake.input.aln).suffix +out_fmt = Path(snakemake.input.aln).suffix.lstrip(".").lower() -shell("compressbam --threads {snakemake.threads} -hts {snakemake.input.aln} {ref} {extra} -type {out_fmt} -out {snakemake.output[0]} {log}") +shell( + "compressbam -threads {snakemake.threads} -hts {snakemake.input.aln} {ref} {extra} -type {out_fmt} -out {snakemake.output[0]} {log}" +) diff --git a/bio/metadmg/getdamage/wrapper.py b/bio/metadmg/getdamage/wrapper.py index f8f561a9a5f..760cfe493a8 100644 --- a/bio/metadmg/getdamage/wrapper.py +++ b/bio/metadmg/getdamage/wrapper.py @@ -17,16 +17,24 @@ with tempfile.TemporaryDirectory() as tmpdir: - shell("metaDMG-cpp getdamage --threads {snakemake.threads} {ref} {extra} --outname {tmpdir}/out {input.aln} {log}") + shell( + "metaDMG-cpp getdamage --threads {snakemake.threads} {ref} {extra} --outname {tmpdir}/out {snakemake.input.aln} {log}" + ) if snakemake.output.get("dmg"): - assert snakemake.output.dmg.endswith(".gz"), "'dmg' file is Gzip compressed, but extension is not '.gz'" + assert snakemake.output.dmg.endswith( + ".gz" + ), "'dmg' file is Gzip compressed, but extension is not '.gz'" shell("cat {tmpdir}/out.bdamage.gz > {snakemake.output.dmg}") if snakemake.output.get("res"): - assert snakemake.output.res.endswith(".gz"), "'res' file is Gzip compressed, but extension is not '.gz'" + assert snakemake.output.res.endswith( + ".gz" + ), "'res' file is Gzip compressed, but extension is not '.gz'" shell("cat {tmpdir}/out.res.gz > {snakemake.output.res}") if snakemake.output.get("stats"): - assert not snakemake.output.stats.endswith(".gz"), "'stats' file is NOT Gzip compressed, but extension is '.gz'" + assert not snakemake.output.stats.endswith( + ".gz" + ), "'stats' file is NOT Gzip compressed, but extension is '.gz'" shell("cat {tmpdir}/out.stat > {snakemake.output.stats}") diff --git a/bio/metadmg/lca/wrapper.py b/bio/metadmg/lca/wrapper.py index 7ec85570fe2..48b14aa5179 100644 --- a/bio/metadmg/lca/wrapper.py +++ b/bio/metadmg/lca/wrapper.py @@ -12,20 +12,30 @@ with tempfile.TemporaryDirectory() as tmpdir: - shell("metaDMG-cpp lca -bam {input.aln} -names {input.names} -nodes {input.nodes} -acc2tax {input.acc2taxid} {extra} --outnames {tmpdir}/out {log}") + shell( + "metaDMG-cpp lca -nthreads {snakemake.threads} -bam {snakemake.input.aln} -names {snakemake.input.names} -nodes {snakemake.input.nodes} -acc2tax {snakemake.input.acc2taxid} -tempfolder {tmpdir} {extra} --outnames {tmpdir}/out {log}" + ) if snakemake.output.get("dmg"): - assert snakemake.output.dmg.endswith(".gz"), "'dmg' file is Gzip compressed, but extension is not '.gz'" + assert snakemake.output.dmg.endswith( + ".gz" + ), "'dmg' file is Gzip compressed, but extension is not '.gz'" shell("cat {tmpdir}/out.bdamage.gz > {snakemake.output.dmg}") if snakemake.output.get("lca"): - assert snakemake.output.lca.endswith(".gz"), "'lca' file is Gzip compressed, but extension is not '.gz'" + assert snakemake.output.lca.endswith( + ".gz" + ), "'lca' file is Gzip compressed, but extension is not '.gz'" shell("cat {tmpdir}/out.lca.gz > {snakemake.output.lca}") if snakemake.output.get("stats"): - assert not snakemake.output.stats.endswith(".gz"), "'stats' file is NOT Gzip compressed, but extension is '.gz'" + assert not snakemake.output.stats.endswith( + ".gz" + ), "'stats' file is NOT Gzip compressed, but extension is '.gz'" shell("cat {tmpdir}/out.stat > {snakemake.output.stats}") if snakemake.output.get("log"): - assert not snakemake.output.log.endswith(".gz"), "'log' file is NOT Gzip compressed, but extension is '.gz'" + assert not snakemake.output.log.endswith( + ".gz" + ), "'log' file is NOT Gzip compressed, but extension is '.gz'" shell("cat {tmpdir}/out.log > {snakemake.output.log}") From 34cc20f1e89f3bd361a152fd9ebe3c498c2802e1 Mon Sep 17 00:00:00 2001 From: fgvieira <1151762+fgvieira@users.noreply.github.com> Date: Fri, 22 Sep 2023 08:12:28 +0200 Subject: [PATCH 3/4] Small fixes --- bio/metadmg/compressbam/test/Snakefile | 1 + bio/metadmg/compressbam/test/a.bam | Bin 521 -> 4894 bytes bio/metadmg/compressbam/test/genome.fasta | 101 +++++++++++++++++++++ bio/metadmg/compressbam/wrapper.py | 5 +- bio/metadmg/getdamage/test/Snakefile | 2 +- bio/metadmg/getdamage/test/a.bam | Bin 521 -> 4896 bytes bio/metadmg/getdamage/test/genome.fasta | 105 +++++++++++++++++++++- bio/metadmg/getdamage/wrapper.py | 4 +- bio/metadmg/lca/test/Snakefile | 5 +- bio/metadmg/lca/test/a.bam | Bin 521 -> 4896 bytes bio/metadmg/lca/test/acc2taxid.tsv | 1 + bio/metadmg/lca/test/names.dmp.gz | Bin 0 -> 969 bytes bio/metadmg/lca/test/nodes.dmp.gz | Bin 0 -> 460 bytes bio/metadmg/lca/wrapper.py | 10 +-- 14 files changed, 219 insertions(+), 15 deletions(-) create mode 100644 bio/metadmg/compressbam/test/genome.fasta create mode 100644 bio/metadmg/lca/test/acc2taxid.tsv create mode 100644 bio/metadmg/lca/test/names.dmp.gz create mode 100644 bio/metadmg/lca/test/nodes.dmp.gz diff --git a/bio/metadmg/compressbam/test/Snakefile b/bio/metadmg/compressbam/test/Snakefile index 08e1095ac4b..5e7ba6e0a59 100644 --- a/bio/metadmg/compressbam/test/Snakefile +++ b/bio/metadmg/compressbam/test/Snakefile @@ -2,6 +2,7 @@ rule compressbam: input: aln="{sample}.bam", + ref="genome.fasta", output: "results/compressbam/{sample}.bam", log: diff --git a/bio/metadmg/compressbam/test/a.bam b/bio/metadmg/compressbam/test/a.bam index a407ae2040d5f118d107ef42cdafa491bf9aa549..603edf6d5b2761108a6c02c606f944120f689d9a 100644 GIT binary patch literal 4894 zcmV+(6XEP1iwFb&00000{{{d;LjnL*3w>Byj9yh0UR1?QHBjwv3u&21ky4pq-!Ib( zo!)EOQWz9SD|MzNfWXwWAf#1eAjSwj06_yyqxfQjm?|Vhf<#2cOA4{U)M!fN3AOq{ zl{5$mxb`_`ue101r#5K=-}(N1S$kc-RTG=fSac4+$_;CVKD%XHj=7=9tHyWk-o0nn ztus@1-ZXdRnJ z6Ve&_=A5X$xnMP&Mc2Qx(M$SfWOYvAd|uPb`gc-!;rcaW^-ag zjH&K-*5_~QS5M$aWqdTzw!hWNNv{NF1g%BKNVYP){W^vYCdkb24i}LF46jVy1^+0o zIKyb8GDDc?P2dcxtj!FGr;_ZXNH9j{a|9RgPt6d<^yg4g;bECQMASCA%&<{Lj24)h0E;I{+te(n%Gy@Dp@e+g$nny!oII@^i9{s&$ z7>tof3^bVaW)?=DEzv2tCnF<}CmVcb&EwWe>4NTkJv#4ZVFcPGly$df#6I&!J&?80 z{iEsgsr8tJI0KJIYbb*TVg7+(*5@FFw=%AN7mS~j$TA~26R%=uFMR%-wWA7Nc)m-( zr^{7)T2UoeKoweOBQrd@ zhmDMp@+0MBZE-<23nSkpl@7-uyJ1jdD|BiMvXT)~r>7~hjfv?{J!4&}`*ez-Um+9w8|W_BuE?@_Gz=}!!~1HAG0M+@g92s#R0xGG0-3Awm8f9Ah|uFk9{|8aJc0*#f;(mi4UjRAlb`dn#1GgUGTD) z+ZjXKqCdq~@!aZ|Ooi=ZPT~xzIYW7>K)OjE#d2)R*T>-3b<9a@oH5?}xzZaJbBWQ% z9R42Ny1FgSVd$v+yI_{bvd7lInofBg%@u^>=fLlzDq!1~Ml;R`k{B4-*jDp3a6s$3 zPvL9rBAqmKb<};P+hVLU2AWZm7b;8xwT~qR`f`p+4BIGm9AFeZj4{~MNzSn7n^eVc zMDT_)TjkqZN(CSTujeP0Wo9-SNogTU8o-xGu&vET?fvsEr|6+T0@40hibY^69_sBhC z(^GehOr?JZ7FDNP6t_F=vh7j_0N|tex6nf2@RbGDYM^j*Wsh~*Cp=SPt>y{O7Fefs z!tg|awHhb9TI|tjn{ce$x79S^->b?!8Z8t4SnxUxvs$myF5$WIEUjhHZ-hsG^Qi(S zl@ZR~HB#bPTZG|nU0UItq6kxO+5#sX5vHFTuJBGhgbN<5?2t7>c;M+#jwhuMPCjKA z-spp1uGi(gvMLCld#t<{X@T&w7nam`JAK37&J7$7%{sqWQSO`7KKO_%&q*2|YKKP>>D!kL_;I3oKE4)+Z zKn^>GH(D6%ziUyAw@MtG{g3iqq`$#?W2;I$t8Oso=UW-xu7Bd=6C7{WIVi4Wc%!Vq zSN3h>c(eY2Yql}G^~1-;S8%+s=fc;w*LbU!!SSP;8Q#p&Fn`|+$D4ikU9zsmTSW|p zPVHlOGh@JofBj5>lllc`9ooY2#!sGk_=*~Dl`eSt+VUMq--7K6mT|nf@8jPS3~xFC zSoqsAUP!@$mo~Tx?{q8p@4hllNv(oo`!DYCUZa8q>n}lIKjR5m3fbJCRp~}%L<&-CAj6)G9F1&f+tqrSlo-0B>3SE zT#0A>2o5}NOFXMaaLL1k-FmGE4xI`mo)sdv=BYEb*-Oz-xP!mv~lr;Oj5nSmIgRfyqmk)_AMvz}$DIN<8a0@YQ*nN<6DK z@asny-fA{*@ShVE-YGS3*4+zgywzvm_Nj%{c{)`Fo;+_|fs+;k@9ey~#Iphe4}8Oy zc-CFu^*Ku!-k$ScYfX)}8Vj8B(2@%8lodGo?b6Pqr@&>uTgvb@-=1;*E`~RA3oJTw zyu`C&0`DG~DR9zB;N%-S7~ZHOF#m}P&zcBq{OLr6cS;D19K5;2v;F~l@2&RDss}85 zzFI$P9kBFx=_jOcz&k@5N`BTgVC!4eI$6zt8#Zj_c*B4DL#6+dash`If4;&yy#nq# zxUIyqN&)*Ot97zA0mtW;?@o#Y?Eg>cKcqvzvnNVFB=rGyzOPF>YYs5EV!XmTr2&>7 z9j)+AUx2^QDeXzB0<8W-X;0D;;KCO_UE*0mfKz7{anb7r@W}jS1x{)KeDb2P63-d| z%rjFZo|OUk+D}S8=>hPE;nL2e0>IRf!0~3C7o#ijJn_T#@0wXk3o zcvml$UpHOh-5d?~e{pApchfU$`=u)JJTt?OFTJ_KyGa>#oY)Be17-e8A8;c803VA8 z1ONa4009360763o0ILU`Sk3pMb;#5JE&CeguM6Z~<{y!G#={HJ6b;r$bRso#BFhTa?OYj1@2UiZ|+ zW5$IaS&t4Zi;%o5N=%CiQC#9Y&B~IjZzMym)BKK*)=SGfr+2^o&HQ`!zx9oKcfb4Q zcdU!$!a6%$SQqL(gZs-1D}5PW;ePe}&MU82k9N=>O%en8$YY`J`_{w5r=cg+dgWB* z>Jng-%oFvcF5bmt9_UxaDey*d`myHpOp6mSaAGW!BIec+WaK)b2YuoX@fd^kR~qYw zTIzYhgf$eg*N9WtL>sQZ$} zy(e25Z)+NEOXi<6(C~yGhF)a-5K^GkQdHHnT<7^}mEjBx!|tE6VA&y{g9~+U(LcTb z1{?J6X!LI?^j4h&>oW!cF%J$Y^R0gadb`YRCH~cvtQ>dcR%u1oltwp`L~wcImV!7- z96pbdMAek}ya7IoJoVAcPHe?tAv#X(p71vW{-p_gZn6#k!Vvy2U`)n=<$VCE%sL|^ z!xh5Vaovm{N)f`yM$|=w+O#;bYYH;}ejFdy@ZsC+SW%hR#q^R=64yV$FXH;o;$#*} zmPnzBXs{fouRm#mUz{`S?{i;z8wM@d zkRNPU#k$CdnCuexohCpJ`XBiql2HVHQyuw?;JO;y7&J%7s&ZC# ze?Jc3y&60p1^%avIcY`TWL|~vMHqXvdA%O|Ugj0xc8m|rVV~?C_J9b2(V!L)By&Z9o$+xTexo_?R>WPC1rPj4fLtCZy|-lI7PNE;(nfaH zC-#h=@nF&8sO}NV&o%U@75=FKx&kg_Y~5C3xrV!+|5K*o4Caq9!EVaqmlWd3##DGF zc}Flcc%mLs8=TM;Qsk^E8|PVxow`7>0ivlb#i;3_k^D=om>X;nOg=mcxTtN>e*BaU z&^+IA=*U8k>Fp~6nw0QDkWRE|QYEs9gYg=nbycL(+y#?_lRk={ZLyD=-u|Jn|LCCq z#zKfFVu7qT7Fk)@h3ymsMmR)DN-Aov?FwcI7dclN?R+2pj!RV`x%4||b-2BMYFh8K zs?g_6wkh^_><89+Y7ujt3B_rNP@WQ-lqef+x_7vVDD7&<9o*j1-2U21*@>yba_AB) zw!W$=>@KU`sQn=A7*KnRKY#Dv+C0g9=EY&%+Au8(RAMr92(ojGG6ym-t*4*R)8d+| z`3@ZEte#KfS^Y*P$t`75!KiY+>CP$Z9Dh2@+`F2^e_FEotcisTMd*drBal_29}FeS z$!w@yIICbdh1nIFN9~I6Cf2k1o+ZaR_Vdec9ogYEvxPs-Yd0H^|F+0|%2bL&Aypas zghK>Qbt zcyhRnw{JSflE+d;mZzHc7H_E)G|dsV$?fE_dz(ARHuf;2vCcVd-@$l%b@BSlVgJQz zAQ<=k&EoaiNvL@Y8WKh>UYlBqTewZNl=}j7&+A%R6`5;OWm$2aB9hr_XF$CDVLO3F zIDgQfpF7-2-SmtkkPvFg>t`fa!%S9bVbf~B8m0-k7`a6Hy~clh*tk~XveJu15Xykp z%gUXQqs*=Bt=ZVSlh{Wf7^Rt?X!y^!G((M+iNzvh)(y1-yB_>rV7rV^2iTwOfHjvz z#=r|f6S=()EbSRwe_U|i+Jo1$rjslPVzyb+?NRoSHQl%Ypy@py9TMbFu*lQ}Gh4RX%U z*|0@!6YRff=yzJsLsRtt6Bb9Ibzim3LTv=?yvj>dm8*|1((}~_uLiJ& zOm&5flU9flyX-0by%(^O=L{@CxagWxIvnNy0fEg$1kE1+03VA81ONa4009360763o Q02=@U00000000000AuZxg8%>k delta 502 zcmVvnonWf zGBM>s2pEAy47ePE+&Mj6ta2-h6LSl4QaOWt;cPA@pb;!U3{=jb3#3^gY$FB@1_lNI zXu%p?WB>pFABzYC000000RIL6LPG)ohyjg~zfQw25XP^6EkRU(=oJG7p{S9N$k=}{ zC!rFs6)K=G0a7%Y%RcT%SMmKi;1DgO?t> zvcnp?7WF=V(&Q|RUF&oZz#~19c6rle4tNbQRH7xZl!;(kMwVrso3<<)aKm=O$1J78 zlr5&{1~;;lW=iz=ajI3xK+cS3(~umcEpknF`a}EE zryS{viy|+cndORBb9}6jVjz76Gs}Ao0;-FPL@6w8qeOScuOV diff --git a/bio/metadmg/compressbam/test/genome.fasta b/bio/metadmg/compressbam/test/genome.fasta new file mode 100644 index 00000000000..157f91330cd --- /dev/null +++ b/bio/metadmg/compressbam/test/genome.fasta @@ -0,0 +1,101 @@ +>NC_023100.1 partial +GATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTTTGGGGG +GTGTGCACGCGATAGCATTGCGAAACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTC +CTGCCCCATCTCATTATTTATCGCACCTACGTTCAATATTACAGGCGAGCATANNTACTAAAGTGTGTTA +ATTAATTAATGCTTGTAGGACATAATAATAACNATTNAATGNCTGCACAGCCGCTTTCCACACAGACATC +ACAACNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGCTTCTGGCCACAGCACTTAAACACATCTCTGCCA +AACCCCAAAAACAAAGAACCCTAACACCAGCCTAGCCAGATTTCAAATTTTATCTTTTGGCGGTATGCAC +TTTTAACAGTCACCCCCCAACTAACACATTATTTTCCCCTCCCACTCCCATACTACTAATCTCATTAATA +CAACCCCCGCCCATNCTACCCANNACACACACACCGCTGCTAACTCCATACCCCGAACCAACCAAACCCC +AAAGACACCCCCCANAGTTTATGTAGCTTACCTCCTCAAAGCAATACACTGAAAATGTTTAGACGGGCTC +ACATCACCCCATAAACAAATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGCAA +GCATCCCCATTCCAGTGAGTTCACCCTCTAAATCACCACGATCAAAAGGAACAAGCATCAAGCACGCAGC +AATGCAGCTCAAAACGCTTAGCCTAGCCACACCCCCACGGGAAACAGCAGTGATAAGCCTTTAGCAATAA +ACGAAAGTTTAACTAAGCTATACTAACTCCAGGGTTGGTCAATTTCGTGCCAGCCACCGCGGTCACACGA +TTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTTTAGATCANNNNNNNNNNAATAAAGCTAAAACT +CACCTGAGTTGTAAAAAACTCCAGTTGATACAAAATAAACTACGAAAGTGGCTTTAACACATCTGAATAC +ACAATAGCTAAGGCCCAAACTGGGATTAGATACCCCACTATGCTTAGCCCTAAACCTCAACAGTTAAATC +AACAAAACTGCTCGCCAGAACACTACGAGCCACAGCTTAAAACTCAAAGGACCTGGCGGTGCTTCATATC +CCTCTAGAGGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCACCACCCCTTGCTCAGCCTATATA +CCGCCATCTTCAGCAAACCCTGATGAAGGCTACAAAGTAAGCGCAAGTACCCACGTAAAGACGTTAGGTC +AAGGTGTAGCCCATGAGGTGGCAAGAAATGGGCTACATTTTCTACCCCAGAAAACTACGATAACCCTTAT +GAAATTTAAGGGTCGAAGGTGGATTTAGCAGTAAACTGAGAATAGAGTGCTTAGTTGAACAGGGCCCTGA +AGCGCGTACACACCGCCCGTCACCCTCCTCAAGTATACTTCAAAGGACATTTAACTAAAACCCCTACGCA +TTTATATAGAGGAGACAAGTCGTAACATGGTAAGTGTACTGGAAAGTGCACTTGGACGAACCAGAGTGTA +GCTTAACACAAAGCACCCAACTTACACTTAGGAGATTTCAACTTAACTTGACCGCTCTGAGCTAAACCTA +GCCCCAAACCCACTCCACCTTACTACCANACAACCTTAACCAAACCATTTACCCAAATAAAGTATAGGCG +ATAGAAATTGTAACCCGGCGCAATAGATATAGTACCGCAAGGGAAAGATGAAAAATTATAACCAAGCATA +ATATAGCAAGGACTAACCCCTATACCTTCTGCATAATGAATTAACTAGAAATAACTTTGCAAGGAGAACC +AAAGCTAAGACCCCCGAAACCAGACGAGCTACCTAAGAACAGCTAAAAGAGCACACCCGTCTATGTAGCA +AAATAGTGGGAAGATTTATAGGTAGAGGCGACAAANCTANCGAGCCTGGTGATAGCTGGTTNTCCAAGAT +AGAATCTTAGTTCAACTTTAAATTTACCCACAGAACCCTCTAAATCCCCTTGTAAATTTAACTGTTAGTC +CAAAGAGGAACAGCTCTTTGGACACTAGGAAAAAACCTTGTAGAGAGAGTAAAAAATTTAACACCCATAG +TAGGCCTAAAAGCAGCCACCAATTAAGAAAGCGTTCAAGCTCAACACCCACTACCTAAAAAATCCCAAAC +ATATAACTGAACTCCTCACACCCAATTGGACCAATCTATCACCTTATAGAAGAACTAATGTTAGTATAAG +TAACATGAAAACATTCTCCTCCGCATAAGCCTGCGTCAGATTAAAACACTGAACTGACAATTAACAGCCC +AATATCTACAATCAACCAACAAGTCATTATTACCCTCACTGTCAACCCAACACAGGCATGCTCATAAGGA +AAGGTTAAAAAAAGTAAAAGGAACTCGGCAAATCTTACCCCGCCTGTTTACCAAAAACATCACCTCTAGC +ATTACCAGTATTAGAGGCACCGCCTGCCCAGTGACACATGTTTAACGGCCGCGGTACCCTAACCGTGCAA +AGGTAGCATAATCACTTGTTCCTTAAATAGGGACCTGTATGAATGGCTCCACGAGGGTTCAGCTGTCTCT +TACTTTTAACCAGTGAAATTGACCTGCCCGTGAAGAGGCGGGCATAACACAGCAAGACGAGAAGACCCTA +TGGAGCTTTAATTTATTAATGCAAACAATACCTAACAAACCCACAGGTCCTAAACTACCAAACCTGCATT +AAAAATTTCGGTTGGGGCGACCTCGGAGCACAGCCCAACCTCCGAGCAGTACATGCTAAGACTTCACCAG +TCAAAGCGAACTACCATACTCAATTGATCCAATAACTTGACCAACGGAACAAGTTACCCTAGGGATAACA +GCGCAATCCTATTCCAGAGTCCATATCAACAATAGGGTTTACGACCTCGATGTTGGATCAGGACATCCCG +ATGGTGCAGCCGCTATTAAAGGTTCGTTTGTTCAACGATTAAAGTCCTACGTGATCTGAGTTCAGACCGG +AGTAATCCAGGTCGGTTTCTATCTACTTCAAATTCCTCCCTGTACGAAAGGACAAGAGAAATAAGGCCTA +CTTCACAAAGCGCCTTCCCCCGTAAATGATATCATCTCAACTTAGTATTNNNNNNNNNNNNNNCCAAGAA +CAGGNTTTGTTAAGATGGCAGAGCCCGGTAATCGCATAAAACTTAAAACTTTACAATCAGAGGTTCAACT +CCTCTTCTTAACAACATACCCATGGCCAACCTCCTACTCCTCATTGCACCCATTCTAATCGCAATGGCAT +TCCTAATGCTTACCGAACGAAAAATTCTAGGCTATATACAACTACGCAAAGGCCCCAACGTTGTAGGCCC +CTACGGGCTACTACAGCCCTTCGCTGACGCCATAAAACTCTTCACCAAAGAGCCCCTAAAACCCTCCACA +TCNACCATCACCCTCTACATCACCGCCCCGACCTTAGCTCTCACCATCGCTCTTCTACTATGAACTCCCC +TCCCCATACCCAACCCCCTGGTCAACCTCAANCTAGGCCTCCTATTTATCCTAGCCACCTCTAGCCTAGC +CGTTTACTCAATCCTCTGATCAGGGTGAGCGTCAAACTCAAACTACGCCCTGATCGGCGCACTGCGAGCA +GTGGCCCAAACAATCTCATATGAAGTCACCCTAGCTATCATCCTACTATCAACATTACTAATAAGTGGCT +CCTTTAACCTCTCCACCCTTATCACAACACAAGAGCACCTCTGATTACTCCTGCCATCATGACCCTTGGC +CATAATATGATTTATCTCCACACTAGCAGAGACCAACCGNANCCCCTTCGACCTTGCNGAAGGGGAGTCC +GAACTAGTCTCAGGCTTCAACATCGAATACGCCGCAGGCCCCTTCGCCCTATTCTTTATAGCCGAATACA +CAAACATCATTATAATAAACACCCTCACCACTACAATCTTCCTAGGAACAACATATAACGCACTCTCCCC +TGAACTCTACACAACATATTTTGTCACCAAGACCCTACTTCTNACCTCCCTATTCTTATGAATTCGAACA +GCATACCCCCGATTCCGCTACGACCAACTCATNCACCTNCTATGAAAAAACTTCCTACCACTCACCCTAG +CATTACTTATATGATATGTCTCCATACCCATTACAATCTCCAGCATTCCCCCTCAAACCTAAGAAATATG +TCTGATAAAAGAGTTACTTTGATAGAGTAAATAATAGGAGTTTAAATCCCCTTATTTCTAGGACTATGAG +AATCGAACCCATCCCTGAGAATCCAAAATTCTCCGTGCTACCTATCACACCCCATCCTAAAGTAAGGTCA +GCTAAATAAGCTATCGGGCCCATACCCCGAAAATGTTGGTTATATCCTTCCCGTACTAATTAATCCCCTG +GCCCAACCCGNCATNNACTCTACCATCTTTACAGGCACACTCATCACAGCGCTAAGCTCGCACTGATTTT +TTACCTGAGTAGGCCTAGAAATAAACATGCTAGCTTTTATTCCAGTTCTAACCAAAAAAATAAACCCTCG +TTCCACAGAAGCTGCCATCAAGTATTTCCTCACGCAAGCAACCGCATCCATAATCCTTCTAATAGCTATC +CTCTTCAACAATATACTCTCCGGACAATGAACCATAACCAACACTANNNATNNNNNNNNGTCATTAATAA +TCATAATGGCTATAGCAATAAAACTAGGAATAGCCCCCTTTCACTTCTGAGTCCCAGAGGTTACCCAAGG +CACCCCTCTGACATCCGGCCTGCTACTTCTCACATGACAAAAACTAGCCCCCATCTCAATCATATACCAA +ATTTCCCCCTCATTAAACGTAAGCCTTCTCCTCACTCTTTCAATCTTATCCATCATGGCAGGCAGTTGAG +GTGGACTAAACCAAACCCAACTACGCAAAATCTTAGCATACTCCTCAATTACCCACATAGGATGAATAAT +AGCAGTTCTACCGTACAACCCTAACATAACCATTCTTAATTTAACTATTTATATTATNCTAACTACTACC +GCATTCCTACTACTCAACTTAAACTCCAGCACCACAACCCTACTACTATCTCGCACCTGAAACAAACTAA +CATGACTAACACCCTTAATTCCATCCACCCTCCTCTCCCTAGGAGGCCTGCCCCCGCTAACCGGCTTTTT +GCCCAAATGGGCCATTATCGAAGAATTCACAAAAAACAATAGCCTCATCATCCCCACCATCATAGCCACC +ATCACCCTCCTTAACCTCTACTTCTACCTGCGCCTAATCTACTCCACCTCAATCACACTACTCCCTATAT +CTAACAACGTAAAAATAAAATGACAGTTTGAACATACAAAACCCACCCCATTCCTCCCCACACTCATCGC +CCTTACCACACTGCTCCTACCTATCTCCCCTTTTATACTNNNNNNNNNNNAGAAATTTAGGTTAAATACA +GACCAAGAGCCTTCAAAGCCCTCAGTAAGTTGCAATACTTAATTTCTGCAACAGCTAAGGACTGCAAAAT +CCCACTCTGCATCAACTGAACGCAAATCAGCCACTTTAATTAAGCTAAGCCCTTACTAGACCAATGGGAC +TTAAACCCACAAACACTTAGTTAACAGCTAAGCACCCTAATCAACTGGCTTCAATCTACTTCTCCCGCCG +CCGGGAAAAAAGGCGGGAGAAGCCCCGGCAGGTTTGAAGCTGCTTCTTCGAATTTGCAATTCAATATGAA +AATCACCTCAGAGCTGGTAAAAAGAGGCTTAACCCCTGTCTTTAGATTTACAGTCCAATGCTTCACTCAG +CCATTTTACCTCACCCCCACTGATGTTCGCCGACCGTTGACTATTCTCTACAAACCACAAAGACATTGGA +ACACTATACCTATTATTCGGCGCATGAGCTGGAGTCCTAGGCACAGCTCTAAGCCTCCTTATTCGAGCCG +AACTGGGCCAGCCAGGCAACCTTCTAGGTAACGACCACATCTACAACGTTATCGTCACAGCCCATGCATT +TGTAATAATCTTCTTCATAGTAATACCCATCATAATCGGAGGCTTTGGCAACTGACTAGTTCCCCTAATA +ATCGGTGCCCCCGATATGGCGTTTCCCCGCATAAACAACATAAGCTTCTGACTCTTACCNCCCTCCCTCC +TACTCCTGCTTGCATCTGCTATAGTGGAGGCCGGCGCAGGGACAGGTTGAACAGTCTACCCTCCCTTAGC +AGGGAACTACTCCCACCCTGGAGCCTCCGTAGACCTAACCATCTTCTCCTTGCACCTAGCAGNTATTTCC +TCTATCTTAGGGGCCATCAATTTCATCACAACAATTATCAATATAAAACCCCCTGCCATGACCCAATACC +AAACGCCCCTTTTCGTCTGATCCGTCCTAATCACAGCAGTCTTGCTTCTCCTATCTCTCCCAGTCCTGGC +CGCTGGCATCACTATACTACTAACAGACCGNAACCTCAACACCACCTTCTTCGACCCNGCCGGAGGAGGA +GACCCCATTCTATACCAACACCTATTCTGATTCTTCGGTCACCCTGAAGTTTATATTCTCATCCTACCAG +GCTTCGGAATAATCTCTCATATTGTAACTTACTACTCCGGAAAAAAAGAACCATTTGGATACATAGGTAT +GGTCTGAGCTATGATATCAATTGGCTTCCTAGGGTTTATCGTGTGAGCACACCATATATTTACAGTAGGA +ATAGACGTAGACACACGAGCATATTTCACCTCCGCTACCATAATCATCGCTATCCCCACCGGCGTCAAAG +TATTTAGCTGACTCGCCACACTCCACGGAAGCAATATGAAATGATCTGCTGCAGTGCTCTGAGCCCTAGG +ATTTATTTTTCTTTTCACCGTAGGTGGCCTGACTGGCATTGTATTAGCAAACTCATCACTAGACATCGTA diff --git a/bio/metadmg/compressbam/wrapper.py b/bio/metadmg/compressbam/wrapper.py index 45465f23194..b35329493bd 100644 --- a/bio/metadmg/compressbam/wrapper.py +++ b/bio/metadmg/compressbam/wrapper.py @@ -4,6 +4,7 @@ import tempfile +from pathlib import Path from snakemake.shell import shell @@ -13,12 +14,12 @@ ref = snakemake.input.get("ref", "") if ref: - ref = f"--ref {ref}" + ref = f"-ref {ref}" out_fmt = Path(snakemake.input.aln).suffix.lstrip(".").lower() shell( - "compressbam -threads {snakemake.threads} -hts {snakemake.input.aln} {ref} {extra} -type {out_fmt} -out {snakemake.output[0]} {log}" + "compressbam -@ {snakemake.threads} -hts {snakemake.input.aln} {ref} {extra} -type {out_fmt} -out {snakemake.output[0]} {log}" ) diff --git a/bio/metadmg/getdamage/test/Snakefile b/bio/metadmg/getdamage/test/Snakefile index b59c82f1bce..51e7751cd3d 100644 --- a/bio/metadmg/getdamage/test/Snakefile +++ b/bio/metadmg/getdamage/test/Snakefile @@ -10,7 +10,7 @@ rule metadmg_getdamage: log: "logs/getdamage/{sample}.log", params: - extra="--minlength 30 --printlength 30 --runmode 1", + extra="-l 30 -p 30 -r 1", threads: 4 resources: mem_mb=1024, diff --git a/bio/metadmg/getdamage/test/a.bam b/bio/metadmg/getdamage/test/a.bam index a407ae2040d5f118d107ef42cdafa491bf9aa549..01219f016b7f05f3f12a79395951587c31bd700a 100644 GIT binary patch literal 4896 zcmV+*6W{C~iwFb&00000{{{d;LjnLv3w>B^te0gOzofDqYD#hPWezw?3?$F-et$W< z$dUI3h+-jwqysW71qvhysx@6}X&=;CQ^v}^#LPzN=5ppTD|;Ebn2lz`X8%%%C?|;8s_x1X_c4FOmuM7h$UA=thQ|rg&m>b%7-S`*w@7(*P z-IKd^PG7q5`k{^M$Ja00CX|*!jLD(3NOM}K>YEXozS&rPlTvC=-<++!si^wjxln!+ z(i!^ZoT$FJU^SgZ*T1vTOZsMHbxz@YUen9^cT#!b`ZZ(qO~>f|$A0M>oyGe4oZbsF zrn=u*pTDhIHh~|N@zF%v{#Gj|y%L-ev=$vB*~;|x>liwiATz@|Ttp5qyfS$g{G+_$ z45N+83}K=-fitYKHZvriO0tt8!5E#-5nRAOHA5KFpF>H7hh_E`8c_$wcR_~9Hg(2Z>G@rdVe>8qpI^#>H9T64&XJ5du!a|!{1lHNxQ`(NuhTKm z7o%Kbwr zK~}daN~_c$gU(@;GcL#}7ROg_mG{Og^^%RW! zwoU|$K%PI8k~ZJPiWt3)gAA7$m`Mb>&=d@^dMX3b3@{YMOAt0_9u0}%$YM@;^!J)! zFh(LV&|ub^DHwgWM5pAQjEq2@Z19;ik6SCH3%d99=)9YP5oniC*4>^F`^+EpK-Nb0 zkEYM3)?*gp3_KpKp$r;?`3HtspMw5xommQY@A196>58^BR~7 zo#yX?g#mkp%s}_>qPB;`5-Yr71!0O@RAj4AJx>Uw&Uc>pVmIDlJ zoXVYn6<8b;KPlRr&SA8Sao{faT0T&`U|g4e+A*+z5+fpmuYn6t++w!G_B7TEDKJe_ zHPsq30b4_A;YHtnV4M%Efvtt0mqQ=ab_zysb^IUi2Ou_UuyFdqks zTTE#YD<5M7ET)b!jKof;wy8`*A>}chI&ELuLrQ$BKn#)EH^CS#4fk}HG(>PlR7GbX zQkTmZ=pRPWT}-Ol%pNfwQ&sSUDZ9QHepj;9Yd!?}AS1 z`%W5Di7I=kmSKb`<`;Zw#ThXM@-ZoJkgDGn%dstAAA?`lF(b5wCp`-Tif>|ER9$N!zI^}gVR}hY$1HY51fNf(M%{U`SVqj!rTg}(N0j=*o zg|D@Xbkfw-QTLf{i?PlaXhuz5s4xxGK9(5h%Q-4BY@^h1fKl`?#$Zz?Im4oFQWe7y z!5hwSsrSuRS6FK-V_;|8XMXQ68SvZU&_MkdcEDIl@EjXf4Xs%|zHf5ZoqP7&v2SR@ z`u@KgZBhE6Woro=`Qm4H-Ze71edNG_F~s=KaqV(}4IBRdV*B>&y>n!A_sHm;kppAf zCwGlZrhlg&27nLZ-$KBIW7iZ|D_p{fr9IXOmhe=GwL&F4U0|I+3BwZw)(VsGabPO&0}DA|X8dn~xVbNrQ0VEh8nK1wk18 z*1QVuWI&iaV+)+bKiK}vaD{i0A6$BWWrr;E!F^ATay-dFrhsH}LM!-KbBSj`1~$%HSmUh>1JmE0Eb%P9z*lFiE%7Y5z^@--cq_EPk$+57 zcqgyG1$WJ=@m5rUJ0@pW=jo&rc>Lm(1x^AAytU){63?;;-1iM%;#n+#*QPCGcze!& zt}r#;3M2552NqO#Cx^g^ZdY{1;}$4flR9Psw>-33nK z2Aq9;8^aq(17S-`&-|S*n29&sOVafdUquD*c3H z33zL0b;-|S1Z;StS|>{ouzB@5jyL=_-&guS$qjI9{%0z@6B*#1BU?&5OABytW3^5e z6yVg%^4&>BfJ6T-{fEQ@c=~kdha?%mj(2s5XQ2Q#E*Y=zP9A{8Cq^s069wRJ(@J}i z6adRUTH2EY0J!Y=Pn38b`{CUAMO^g5K0GvYQGwHl4~`itBdo~&<(HLRM@#6wc*&R(Higk?e{DCcQ3|j`1AWk$1noo`_5f@)K|*j? z@#DVuE`2}oQ3Ap`>PBM`KL3y8}KE^vs%ol7p9P_n#om&cx-9+%ygJl)eXmcH-R zd*A!s>o?Kna^SX~ewIwo(d1+B{;9sy-@fLD?rZ$@*TM&1aqCNZVdzHa2M6#RvopKO zi>jKI>pWkrGLqqB@O7Hs6W%!wmK?pZymxy4Ti?jP`|z7ze{lahZ+shFEEnkPba8N@ z?z08D_`(9EcOSfWareQ~FP-0e^;PssjsE_mMgLI~dY^}ZaM43R&nU(er-T)R(9FTi zvL6jc?(t=KGwjKQx?9*!FYYeT2KG-i>=zDO*i!>+&fO^XBJ?uAE;33PqthG{i>}hb zVnY!JH{(SY>b}Gaua|g#r13t}NfK(r8~QR9=m_wZMF}CUFe4?&)2uYdSYh?{!uku1 z^{)pl))Ny}5pc<4^i_DqT3Z!mj)QH=NLpf?r^6E>NPF_QCn5f+L4Wx&A2ULm+(hjA5pv;~!pc_r~*26%jfayEjFyAVJp8mBj!%WpInanN#c zOk4sll7WjJspKLA;L}xQuPmovCGc7eWdP8*I1|CbHAA0Vm&%`M=pP!RIici{D{9Rl z+mch=h;-hvQSn}S{-~m85dTUeeh#(4e$k9LhTK8l2gFdtc0s5Epe;xDsiEp#BbPnJ zJV1d+C*e&BBs|I8P}B-k6`5l(l_?;Wm}b`69t_!Cc;i}96YMuN=)+F1KVbqLb0MK6 zJyF4?`L@cJbbV`ZxSNpoq)Jc7o8*4?V0dzS9MDH`o!l9wt;#T^G7YBrb(l?Z&z0gY zJ+W838^qBV#GoV6-yq(Wkzu9_5C>;a9!A8I>pkkX8u1r9h+i@wmTm~3BlK7yF0n;z zLe?0st0JA|4y7eYhDhJ6&5xcQ)RP}SzGk@4gqg5|Bak6Sw{{t>L812c1N##V_9LAp zV{%(kdNPdN+HEn+Fac+q0%S(4YLMF!;n@I{-jNpn&}e_yNeh#8kur*S;MdkAD=Vw8 z?1IuY#k6F!VjTJx@j&doiEO1Ha)TY?V3C?V4(J4IwP@$_+NZFHF5-U}i&y#pozwwjgP68jsNg~9i zAW|MJ2Y9U=W9h#%i~n>g_){hpekemXL{H$j@)S^)SdG%fR$qb>shcX$)z+#lG&t zd&d2D8b9k4xH0aLA4M=hskmp9P#AV`cajeL${@Lplr;iH0u4Hjj~h9f9V@Aw*UHqYQ(XV1G`RjZP~_O>i7%B)4Myfy zQy)(>!FG-@+mQ&yM$hwVJF}0%o|^x2V+c{J zRVKxa1owRD){1+TgOlg^dQGQOdoRTm(Rp9)^&HKwHI38B@Y0X`FaWz)m;QF&fY%Yi zr96pP_O!en(fXf8{=E+JyGG=3$Rntl_amhC$`!>`Wjob6%L_1tRY5FBGw6KK`|P5s z7%Lfh#ME_iGV)mBogbc@P)a}if|87-$CtwG-Fo$O{AN4(R>#hK#za4MMeGIW9aU4? zuD)5UZd}gzG`x1QGD`cllRZcKd(G|mu<`i_ebT@!mOHGSvN z>|(IrX8vy){=JSJ2u<+8YQ|BB9;z{=XjaGNmDMpxD|IZh8OT@ch7xEfA)6<+;Z2;( zpTEuA`F!3E?=q3I_$4=qZUdS4yl;=wBN2}^ofn7OfA0DLm!4bO|0{EEu`aTTIIP5@ zGAyVUJA$8y{RZNO!z&#Q2DRpHAfiCXTCI1_jf{;TA= zw>CP4?m6Jvf7jD&GXL9AtB;$ohtgMjs-LSbtLnLHYacjfky(2*P~5%r=q-)@4b)6m zMm^#H-r-y^PJm;S36`L*(bxR7%v7s+Any=O?YFEReA8%qW#5o_==9|oE zVowi-XT9eM&v!TW-rC;0)e1B+>v=t&^rU{zeDZ%`gwx&49{>O!iwFb&00000{{{d; SLjnLB00RI3000000000FdWmfS literal 521 zcmb2|=3rp}f&Xj_PR>jW$qdCsUsAWECnOYb@IB%Awpq)|_wkpHn~e;gI&nxey7KIh zX}V)+8@r}RyIZ)JyC}%Ax%Od>p4tLI9S5F}a|^haL@MSev+=UAu`y&El!{FO8ZD1z zM+al?r2T$}4Fry@*L4hOcp=Knws6J72`6sU8>lZ7+90a6z<@ExTx->)17gOhZ>&E! ztvO@EG3Ro2s^!`!@A)!w-`@W|cdla3ajlbu`ltD74w_x>^EkdFacflb3_GVF2d&Ag zmbiF`D23F0m@a!a?@HuC--y%A4U94K?Z4RHoA;}Ey4;>khoyFleYLyvM5ibAb&0?M zW!p(*m6ucaDtHY;7O8HW(wPwDHZ67D>}@*I1&7NH9oJcUc#7__7i{L6rlfiOc)B2W zlE}XIuAdLJ_Snv8({41@j_EI46?)R)Tu=Jt2{KD{i+sy}Isd-AT;^qWx03$6nNPdI zifh(PNK&X}Gd^9-%e1Py+eK4nu}k@}4exc2&vGcw$&kGjlCRq7@+IKpjo6mjv?Ygc zFr8&L+cwc`&H5kF|2Ol5KMJmVcPDYzoz31CIriu8Hc52(s%hD?VUx(8zK5^bVs3p9 w?pT?(;cx1N?LGHQE>xTS5@8E5Ke1Hx#Q#74zn?J!Ml*UuNHZ{l;~qo+0Fsg3fB*mh diff --git a/bio/metadmg/getdamage/test/genome.fasta b/bio/metadmg/getdamage/test/genome.fasta index afe990a63bc..157f91330cd 100644 --- a/bio/metadmg/getdamage/test/genome.fasta +++ b/bio/metadmg/getdamage/test/genome.fasta @@ -1,4 +1,101 @@ ->ref -AGCATGTTAGATAAGATAGCTGTGCTAGTAGGCAGTCAGCGCCAT ->ref2 -aggttttataaaacaattaagtctacagagcaactacgcg +>NC_023100.1 partial +GATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTTTGGGGG +GTGTGCACGCGATAGCATTGCGAAACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTC +CTGCCCCATCTCATTATTTATCGCACCTACGTTCAATATTACAGGCGAGCATANNTACTAAAGTGTGTTA +ATTAATTAATGCTTGTAGGACATAATAATAACNATTNAATGNCTGCACAGCCGCTTTCCACACAGACATC +ACAACNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGCTTCTGGCCACAGCACTTAAACACATCTCTGCCA +AACCCCAAAAACAAAGAACCCTAACACCAGCCTAGCCAGATTTCAAATTTTATCTTTTGGCGGTATGCAC +TTTTAACAGTCACCCCCCAACTAACACATTATTTTCCCCTCCCACTCCCATACTACTAATCTCATTAATA +CAACCCCCGCCCATNCTACCCANNACACACACACCGCTGCTAACTCCATACCCCGAACCAACCAAACCCC +AAAGACACCCCCCANAGTTTATGTAGCTTACCTCCTCAAAGCAATACACTGAAAATGTTTAGACGGGCTC +ACATCACCCCATAAACAAATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGCAA +GCATCCCCATTCCAGTGAGTTCACCCTCTAAATCACCACGATCAAAAGGAACAAGCATCAAGCACGCAGC +AATGCAGCTCAAAACGCTTAGCCTAGCCACACCCCCACGGGAAACAGCAGTGATAAGCCTTTAGCAATAA +ACGAAAGTTTAACTAAGCTATACTAACTCCAGGGTTGGTCAATTTCGTGCCAGCCACCGCGGTCACACGA +TTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTTTAGATCANNNNNNNNNNAATAAAGCTAAAACT +CACCTGAGTTGTAAAAAACTCCAGTTGATACAAAATAAACTACGAAAGTGGCTTTAACACATCTGAATAC +ACAATAGCTAAGGCCCAAACTGGGATTAGATACCCCACTATGCTTAGCCCTAAACCTCAACAGTTAAATC +AACAAAACTGCTCGCCAGAACACTACGAGCCACAGCTTAAAACTCAAAGGACCTGGCGGTGCTTCATATC +CCTCTAGAGGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCACCACCCCTTGCTCAGCCTATATA +CCGCCATCTTCAGCAAACCCTGATGAAGGCTACAAAGTAAGCGCAAGTACCCACGTAAAGACGTTAGGTC +AAGGTGTAGCCCATGAGGTGGCAAGAAATGGGCTACATTTTCTACCCCAGAAAACTACGATAACCCTTAT +GAAATTTAAGGGTCGAAGGTGGATTTAGCAGTAAACTGAGAATAGAGTGCTTAGTTGAACAGGGCCCTGA +AGCGCGTACACACCGCCCGTCACCCTCCTCAAGTATACTTCAAAGGACATTTAACTAAAACCCCTACGCA +TTTATATAGAGGAGACAAGTCGTAACATGGTAAGTGTACTGGAAAGTGCACTTGGACGAACCAGAGTGTA +GCTTAACACAAAGCACCCAACTTACACTTAGGAGATTTCAACTTAACTTGACCGCTCTGAGCTAAACCTA +GCCCCAAACCCACTCCACCTTACTACCANACAACCTTAACCAAACCATTTACCCAAATAAAGTATAGGCG +ATAGAAATTGTAACCCGGCGCAATAGATATAGTACCGCAAGGGAAAGATGAAAAATTATAACCAAGCATA +ATATAGCAAGGACTAACCCCTATACCTTCTGCATAATGAATTAACTAGAAATAACTTTGCAAGGAGAACC +AAAGCTAAGACCCCCGAAACCAGACGAGCTACCTAAGAACAGCTAAAAGAGCACACCCGTCTATGTAGCA +AAATAGTGGGAAGATTTATAGGTAGAGGCGACAAANCTANCGAGCCTGGTGATAGCTGGTTNTCCAAGAT +AGAATCTTAGTTCAACTTTAAATTTACCCACAGAACCCTCTAAATCCCCTTGTAAATTTAACTGTTAGTC +CAAAGAGGAACAGCTCTTTGGACACTAGGAAAAAACCTTGTAGAGAGAGTAAAAAATTTAACACCCATAG +TAGGCCTAAAAGCAGCCACCAATTAAGAAAGCGTTCAAGCTCAACACCCACTACCTAAAAAATCCCAAAC +ATATAACTGAACTCCTCACACCCAATTGGACCAATCTATCACCTTATAGAAGAACTAATGTTAGTATAAG +TAACATGAAAACATTCTCCTCCGCATAAGCCTGCGTCAGATTAAAACACTGAACTGACAATTAACAGCCC +AATATCTACAATCAACCAACAAGTCATTATTACCCTCACTGTCAACCCAACACAGGCATGCTCATAAGGA +AAGGTTAAAAAAAGTAAAAGGAACTCGGCAAATCTTACCCCGCCTGTTTACCAAAAACATCACCTCTAGC +ATTACCAGTATTAGAGGCACCGCCTGCCCAGTGACACATGTTTAACGGCCGCGGTACCCTAACCGTGCAA +AGGTAGCATAATCACTTGTTCCTTAAATAGGGACCTGTATGAATGGCTCCACGAGGGTTCAGCTGTCTCT +TACTTTTAACCAGTGAAATTGACCTGCCCGTGAAGAGGCGGGCATAACACAGCAAGACGAGAAGACCCTA +TGGAGCTTTAATTTATTAATGCAAACAATACCTAACAAACCCACAGGTCCTAAACTACCAAACCTGCATT +AAAAATTTCGGTTGGGGCGACCTCGGAGCACAGCCCAACCTCCGAGCAGTACATGCTAAGACTTCACCAG +TCAAAGCGAACTACCATACTCAATTGATCCAATAACTTGACCAACGGAACAAGTTACCCTAGGGATAACA +GCGCAATCCTATTCCAGAGTCCATATCAACAATAGGGTTTACGACCTCGATGTTGGATCAGGACATCCCG +ATGGTGCAGCCGCTATTAAAGGTTCGTTTGTTCAACGATTAAAGTCCTACGTGATCTGAGTTCAGACCGG +AGTAATCCAGGTCGGTTTCTATCTACTTCAAATTCCTCCCTGTACGAAAGGACAAGAGAAATAAGGCCTA +CTTCACAAAGCGCCTTCCCCCGTAAATGATATCATCTCAACTTAGTATTNNNNNNNNNNNNNNCCAAGAA +CAGGNTTTGTTAAGATGGCAGAGCCCGGTAATCGCATAAAACTTAAAACTTTACAATCAGAGGTTCAACT +CCTCTTCTTAACAACATACCCATGGCCAACCTCCTACTCCTCATTGCACCCATTCTAATCGCAATGGCAT +TCCTAATGCTTACCGAACGAAAAATTCTAGGCTATATACAACTACGCAAAGGCCCCAACGTTGTAGGCCC +CTACGGGCTACTACAGCCCTTCGCTGACGCCATAAAACTCTTCACCAAAGAGCCCCTAAAACCCTCCACA +TCNACCATCACCCTCTACATCACCGCCCCGACCTTAGCTCTCACCATCGCTCTTCTACTATGAACTCCCC +TCCCCATACCCAACCCCCTGGTCAACCTCAANCTAGGCCTCCTATTTATCCTAGCCACCTCTAGCCTAGC +CGTTTACTCAATCCTCTGATCAGGGTGAGCGTCAAACTCAAACTACGCCCTGATCGGCGCACTGCGAGCA +GTGGCCCAAACAATCTCATATGAAGTCACCCTAGCTATCATCCTACTATCAACATTACTAATAAGTGGCT +CCTTTAACCTCTCCACCCTTATCACAACACAAGAGCACCTCTGATTACTCCTGCCATCATGACCCTTGGC +CATAATATGATTTATCTCCACACTAGCAGAGACCAACCGNANCCCCTTCGACCTTGCNGAAGGGGAGTCC +GAACTAGTCTCAGGCTTCAACATCGAATACGCCGCAGGCCCCTTCGCCCTATTCTTTATAGCCGAATACA +CAAACATCATTATAATAAACACCCTCACCACTACAATCTTCCTAGGAACAACATATAACGCACTCTCCCC +TGAACTCTACACAACATATTTTGTCACCAAGACCCTACTTCTNACCTCCCTATTCTTATGAATTCGAACA +GCATACCCCCGATTCCGCTACGACCAACTCATNCACCTNCTATGAAAAAACTTCCTACCACTCACCCTAG +CATTACTTATATGATATGTCTCCATACCCATTACAATCTCCAGCATTCCCCCTCAAACCTAAGAAATATG +TCTGATAAAAGAGTTACTTTGATAGAGTAAATAATAGGAGTTTAAATCCCCTTATTTCTAGGACTATGAG +AATCGAACCCATCCCTGAGAATCCAAAATTCTCCGTGCTACCTATCACACCCCATCCTAAAGTAAGGTCA +GCTAAATAAGCTATCGGGCCCATACCCCGAAAATGTTGGTTATATCCTTCCCGTACTAATTAATCCCCTG +GCCCAACCCGNCATNNACTCTACCATCTTTACAGGCACACTCATCACAGCGCTAAGCTCGCACTGATTTT +TTACCTGAGTAGGCCTAGAAATAAACATGCTAGCTTTTATTCCAGTTCTAACCAAAAAAATAAACCCTCG +TTCCACAGAAGCTGCCATCAAGTATTTCCTCACGCAAGCAACCGCATCCATAATCCTTCTAATAGCTATC +CTCTTCAACAATATACTCTCCGGACAATGAACCATAACCAACACTANNNATNNNNNNNNGTCATTAATAA +TCATAATGGCTATAGCAATAAAACTAGGAATAGCCCCCTTTCACTTCTGAGTCCCAGAGGTTACCCAAGG +CACCCCTCTGACATCCGGCCTGCTACTTCTCACATGACAAAAACTAGCCCCCATCTCAATCATATACCAA +ATTTCCCCCTCATTAAACGTAAGCCTTCTCCTCACTCTTTCAATCTTATCCATCATGGCAGGCAGTTGAG +GTGGACTAAACCAAACCCAACTACGCAAAATCTTAGCATACTCCTCAATTACCCACATAGGATGAATAAT +AGCAGTTCTACCGTACAACCCTAACATAACCATTCTTAATTTAACTATTTATATTATNCTAACTACTACC +GCATTCCTACTACTCAACTTAAACTCCAGCACCACAACCCTACTACTATCTCGCACCTGAAACAAACTAA +CATGACTAACACCCTTAATTCCATCCACCCTCCTCTCCCTAGGAGGCCTGCCCCCGCTAACCGGCTTTTT +GCCCAAATGGGCCATTATCGAAGAATTCACAAAAAACAATAGCCTCATCATCCCCACCATCATAGCCACC +ATCACCCTCCTTAACCTCTACTTCTACCTGCGCCTAATCTACTCCACCTCAATCACACTACTCCCTATAT +CTAACAACGTAAAAATAAAATGACAGTTTGAACATACAAAACCCACCCCATTCCTCCCCACACTCATCGC +CCTTACCACACTGCTCCTACCTATCTCCCCTTTTATACTNNNNNNNNNNNAGAAATTTAGGTTAAATACA +GACCAAGAGCCTTCAAAGCCCTCAGTAAGTTGCAATACTTAATTTCTGCAACAGCTAAGGACTGCAAAAT +CCCACTCTGCATCAACTGAACGCAAATCAGCCACTTTAATTAAGCTAAGCCCTTACTAGACCAATGGGAC +TTAAACCCACAAACACTTAGTTAACAGCTAAGCACCCTAATCAACTGGCTTCAATCTACTTCTCCCGCCG +CCGGGAAAAAAGGCGGGAGAAGCCCCGGCAGGTTTGAAGCTGCTTCTTCGAATTTGCAATTCAATATGAA +AATCACCTCAGAGCTGGTAAAAAGAGGCTTAACCCCTGTCTTTAGATTTACAGTCCAATGCTTCACTCAG +CCATTTTACCTCACCCCCACTGATGTTCGCCGACCGTTGACTATTCTCTACAAACCACAAAGACATTGGA +ACACTATACCTATTATTCGGCGCATGAGCTGGAGTCCTAGGCACAGCTCTAAGCCTCCTTATTCGAGCCG +AACTGGGCCAGCCAGGCAACCTTCTAGGTAACGACCACATCTACAACGTTATCGTCACAGCCCATGCATT +TGTAATAATCTTCTTCATAGTAATACCCATCATAATCGGAGGCTTTGGCAACTGACTAGTTCCCCTAATA +ATCGGTGCCCCCGATATGGCGTTTCCCCGCATAAACAACATAAGCTTCTGACTCTTACCNCCCTCCCTCC +TACTCCTGCTTGCATCTGCTATAGTGGAGGCCGGCGCAGGGACAGGTTGAACAGTCTACCCTCCCTTAGC +AGGGAACTACTCCCACCCTGGAGCCTCCGTAGACCTAACCATCTTCTCCTTGCACCTAGCAGNTATTTCC +TCTATCTTAGGGGCCATCAATTTCATCACAACAATTATCAATATAAAACCCCCTGCCATGACCCAATACC +AAACGCCCCTTTTCGTCTGATCCGTCCTAATCACAGCAGTCTTGCTTCTCCTATCTCTCCCAGTCCTGGC +CGCTGGCATCACTATACTACTAACAGACCGNAACCTCAACACCACCTTCTTCGACCCNGCCGGAGGAGGA +GACCCCATTCTATACCAACACCTATTCTGATTCTTCGGTCACCCTGAAGTTTATATTCTCATCCTACCAG +GCTTCGGAATAATCTCTCATATTGTAACTTACTACTCCGGAAAAAAAGAACCATTTGGATACATAGGTAT +GGTCTGAGCTATGATATCAATTGGCTTCCTAGGGTTTATCGTGTGAGCACACCATATATTTACAGTAGGA +ATAGACGTAGACACACGAGCATATTTCACCTCCGCTACCATAATCATCGCTATCCCCACCGGCGTCAAAG +TATTTAGCTGACTCGCCACACTCCACGGAAGCAATATGAAATGATCTGCTGCAGTGCTCTGAGCCCTAGG +ATTTATTTTTCTTTTCACCGTAGGTGGCCTGACTGGCATTGTATTAGCAAACTCATCACTAGACATCGTA diff --git a/bio/metadmg/getdamage/wrapper.py b/bio/metadmg/getdamage/wrapper.py index 760cfe493a8..9b93732bf0c 100644 --- a/bio/metadmg/getdamage/wrapper.py +++ b/bio/metadmg/getdamage/wrapper.py @@ -13,12 +13,12 @@ ref = snakemake.input.get("ref", "") if ref: - ref = f"--fasta {ref}" + ref = f"-f {ref}" with tempfile.TemporaryDirectory() as tmpdir: shell( - "metaDMG-cpp getdamage --threads {snakemake.threads} {ref} {extra} --outname {tmpdir}/out {snakemake.input.aln} {log}" + "metaDMG-cpp getdamage --threads {snakemake.threads} {ref} {extra} -o {tmpdir}/out {snakemake.input.aln} {log}" ) if snakemake.output.get("dmg"): diff --git a/bio/metadmg/lca/test/Snakefile b/bio/metadmg/lca/test/Snakefile index aa27cf87760..f002c575772 100644 --- a/bio/metadmg/lca/test/Snakefile +++ b/bio/metadmg/lca/test/Snakefile @@ -2,6 +2,9 @@ rule metadmg_lca: input: aln="{sample}.bam", + names="names.dmp.gz", + nodes="nodes.dmp.gz", + acc2taxid="acc2taxid.tsv", output: res="results/lca/{sample}.out.gz", lca="results/lca/{sample}.lca.gz", @@ -10,7 +13,7 @@ rule metadmg_lca: log: "logs/lca/{sample}.log", params: - extra="-simscorelow 0.95 -simscorehigh 1.0 -minmapq 30 -howmany 30 -lca_rank species", + extra="-simscorelow 0.95 -simscorehigh 1.0 -minmapq 30 -howmany 30 -lca_rank genus -fix_ncbi 0", threads: 4 resources: mem_mb=1024, diff --git a/bio/metadmg/lca/test/a.bam b/bio/metadmg/lca/test/a.bam index a407ae2040d5f118d107ef42cdafa491bf9aa549..01219f016b7f05f3f12a79395951587c31bd700a 100644 GIT binary patch literal 4896 zcmV+*6W{C~iwFb&00000{{{d;LjnLv3w>B^te0gOzofDqYD#hPWezw?3?$F-et$W< z$dUI3h+-jwqysW71qvhysx@6}X&=;CQ^v}^#LPzN=5ppTD|;Ebn2lz`X8%%%C?|;8s_x1X_c4FOmuM7h$UA=thQ|rg&m>b%7-S`*w@7(*P z-IKd^PG7q5`k{^M$Ja00CX|*!jLD(3NOM}K>YEXozS&rPlTvC=-<++!si^wjxln!+ z(i!^ZoT$FJU^SgZ*T1vTOZsMHbxz@YUen9^cT#!b`ZZ(qO~>f|$A0M>oyGe4oZbsF zrn=u*pTDhIHh~|N@zF%v{#Gj|y%L-ev=$vB*~;|x>liwiATz@|Ttp5qyfS$g{G+_$ z45N+83}K=-fitYKHZvriO0tt8!5E#-5nRAOHA5KFpF>H7hh_E`8c_$wcR_~9Hg(2Z>G@rdVe>8qpI^#>H9T64&XJ5du!a|!{1lHNxQ`(NuhTKm z7o%Kbwr zK~}daN~_c$gU(@;GcL#}7ROg_mG{Og^^%RW! zwoU|$K%PI8k~ZJPiWt3)gAA7$m`Mb>&=d@^dMX3b3@{YMOAt0_9u0}%$YM@;^!J)! zFh(LV&|ub^DHwgWM5pAQjEq2@Z19;ik6SCH3%d99=)9YP5oniC*4>^F`^+EpK-Nb0 zkEYM3)?*gp3_KpKp$r;?`3HtspMw5xommQY@A196>58^BR~7 zo#yX?g#mkp%s}_>qPB;`5-Yr71!0O@RAj4AJx>Uw&Uc>pVmIDlJ zoXVYn6<8b;KPlRr&SA8Sao{faT0T&`U|g4e+A*+z5+fpmuYn6t++w!G_B7TEDKJe_ zHPsq30b4_A;YHtnV4M%Efvtt0mqQ=ab_zysb^IUi2Ou_UuyFdqks zTTE#YD<5M7ET)b!jKof;wy8`*A>}chI&ELuLrQ$BKn#)EH^CS#4fk}HG(>PlR7GbX zQkTmZ=pRPWT}-Ol%pNfwQ&sSUDZ9QHepj;9Yd!?}AS1 z`%W5Di7I=kmSKb`<`;Zw#ThXM@-ZoJkgDGn%dstAAA?`lF(b5wCp`-Tif>|ER9$N!zI^}gVR}hY$1HY51fNf(M%{U`SVqj!rTg}(N0j=*o zg|D@Xbkfw-QTLf{i?PlaXhuz5s4xxGK9(5h%Q-4BY@^h1fKl`?#$Zz?Im4oFQWe7y z!5hwSsrSuRS6FK-V_;|8XMXQ68SvZU&_MkdcEDIl@EjXf4Xs%|zHf5ZoqP7&v2SR@ z`u@KgZBhE6Woro=`Qm4H-Ze71edNG_F~s=KaqV(}4IBRdV*B>&y>n!A_sHm;kppAf zCwGlZrhlg&27nLZ-$KBIW7iZ|D_p{fr9IXOmhe=GwL&F4U0|I+3BwZw)(VsGabPO&0}DA|X8dn~xVbNrQ0VEh8nK1wk18 z*1QVuWI&iaV+)+bKiK}vaD{i0A6$BWWrr;E!F^ATay-dFrhsH}LM!-KbBSj`1~$%HSmUh>1JmE0Eb%P9z*lFiE%7Y5z^@--cq_EPk$+57 zcqgyG1$WJ=@m5rUJ0@pW=jo&rc>Lm(1x^AAytU){63?;;-1iM%;#n+#*QPCGcze!& zt}r#;3M2552NqO#Cx^g^ZdY{1;}$4flR9Psw>-33nK z2Aq9;8^aq(17S-`&-|S*n29&sOVafdUquD*c3H z33zL0b;-|S1Z;StS|>{ouzB@5jyL=_-&guS$qjI9{%0z@6B*#1BU?&5OABytW3^5e z6yVg%^4&>BfJ6T-{fEQ@c=~kdha?%mj(2s5XQ2Q#E*Y=zP9A{8Cq^s069wRJ(@J}i z6adRUTH2EY0J!Y=Pn38b`{CUAMO^g5K0GvYQGwHl4~`itBdo~&<(HLRM@#6wc*&R(Higk?e{DCcQ3|j`1AWk$1noo`_5f@)K|*j? z@#DVuE`2}oQ3Ap`>PBM`KL3y8}KE^vs%ol7p9P_n#om&cx-9+%ygJl)eXmcH-R zd*A!s>o?Kna^SX~ewIwo(d1+B{;9sy-@fLD?rZ$@*TM&1aqCNZVdzHa2M6#RvopKO zi>jKI>pWkrGLqqB@O7Hs6W%!wmK?pZymxy4Ti?jP`|z7ze{lahZ+shFEEnkPba8N@ z?z08D_`(9EcOSfWareQ~FP-0e^;PssjsE_mMgLI~dY^}ZaM43R&nU(er-T)R(9FTi zvL6jc?(t=KGwjKQx?9*!FYYeT2KG-i>=zDO*i!>+&fO^XBJ?uAE;33PqthG{i>}hb zVnY!JH{(SY>b}Gaua|g#r13t}NfK(r8~QR9=m_wZMF}CUFe4?&)2uYdSYh?{!uku1 z^{)pl))Ny}5pc<4^i_DqT3Z!mj)QH=NLpf?r^6E>NPF_QCn5f+L4Wx&A2ULm+(hjA5pv;~!pc_r~*26%jfayEjFyAVJp8mBj!%WpInanN#c zOk4sll7WjJspKLA;L}xQuPmovCGc7eWdP8*I1|CbHAA0Vm&%`M=pP!RIici{D{9Rl z+mch=h;-hvQSn}S{-~m85dTUeeh#(4e$k9LhTK8l2gFdtc0s5Epe;xDsiEp#BbPnJ zJV1d+C*e&BBs|I8P}B-k6`5l(l_?;Wm}b`69t_!Cc;i}96YMuN=)+F1KVbqLb0MK6 zJyF4?`L@cJbbV`ZxSNpoq)Jc7o8*4?V0dzS9MDH`o!l9wt;#T^G7YBrb(l?Z&z0gY zJ+W838^qBV#GoV6-yq(Wkzu9_5C>;a9!A8I>pkkX8u1r9h+i@wmTm~3BlK7yF0n;z zLe?0st0JA|4y7eYhDhJ6&5xcQ)RP}SzGk@4gqg5|Bak6Sw{{t>L812c1N##V_9LAp zV{%(kdNPdN+HEn+Fac+q0%S(4YLMF!;n@I{-jNpn&}e_yNeh#8kur*S;MdkAD=Vw8 z?1IuY#k6F!VjTJx@j&doiEO1Ha)TY?V3C?V4(J4IwP@$_+NZFHF5-U}i&y#pozwwjgP68jsNg~9i zAW|MJ2Y9U=W9h#%i~n>g_){hpekemXL{H$j@)S^)SdG%fR$qb>shcX$)z+#lG&t zd&d2D8b9k4xH0aLA4M=hskmp9P#AV`cajeL${@Lplr;iH0u4Hjj~h9f9V@Aw*UHqYQ(XV1G`RjZP~_O>i7%B)4Myfy zQy)(>!FG-@+mQ&yM$hwVJF}0%o|^x2V+c{J zRVKxa1owRD){1+TgOlg^dQGQOdoRTm(Rp9)^&HKwHI38B@Y0X`FaWz)m;QF&fY%Yi zr96pP_O!en(fXf8{=E+JyGG=3$Rntl_amhC$`!>`Wjob6%L_1tRY5FBGw6KK`|P5s z7%Lfh#ME_iGV)mBogbc@P)a}if|87-$CtwG-Fo$O{AN4(R>#hK#za4MMeGIW9aU4? zuD)5UZd}gzG`x1QGD`cllRZcKd(G|mu<`i_ebT@!mOHGSvN z>|(IrX8vy){=JSJ2u<+8YQ|BB9;z{=XjaGNmDMpxD|IZh8OT@ch7xEfA)6<+;Z2;( zpTEuA`F!3E?=q3I_$4=qZUdS4yl;=wBN2}^ofn7OfA0DLm!4bO|0{EEu`aTTIIP5@ zGAyVUJA$8y{RZNO!z&#Q2DRpHAfiCXTCI1_jf{;TA= zw>CP4?m6Jvf7jD&GXL9AtB;$ohtgMjs-LSbtLnLHYacjfky(2*P~5%r=q-)@4b)6m zMm^#H-r-y^PJm;S36`L*(bxR7%v7s+Any=O?YFEReA8%qW#5o_==9|oE zVowi-XT9eM&v!TW-rC;0)e1B+>v=t&^rU{zeDZ%`gwx&49{>O!iwFb&00000{{{d; SLjnLB00RI3000000000FdWmfS literal 521 zcmb2|=3rp}f&Xj_PR>jW$qdCsUsAWECnOYb@IB%Awpq)|_wkpHn~e;gI&nxey7KIh zX}V)+8@r}RyIZ)JyC}%Ax%Od>p4tLI9S5F}a|^haL@MSev+=UAu`y&El!{FO8ZD1z zM+al?r2T$}4Fry@*L4hOcp=Knws6J72`6sU8>lZ7+90a6z<@ExTx->)17gOhZ>&E! ztvO@EG3Ro2s^!`!@A)!w-`@W|cdla3ajlbu`ltD74w_x>^EkdFacflb3_GVF2d&Ag zmbiF`D23F0m@a!a?@HuC--y%A4U94K?Z4RHoA;}Ey4;>khoyFleYLyvM5ibAb&0?M zW!p(*m6ucaDtHY;7O8HW(wPwDHZ67D>}@*I1&7NH9oJcUc#7__7i{L6rlfiOc)B2W zlE}XIuAdLJ_Snv8({41@j_EI46?)R)Tu=Jt2{KD{i+sy}Isd-AT;^qWx03$6nNPdI zifh(PNK&X}Gd^9-%e1Py+eK4nu}k@}4exc2&vGcw$&kGjlCRq7@+IKpjo6mjv?Ygc zFr8&L+cwc`&H5kF|2Ol5KMJmVcPDYzoz31CIriu8Hc52(s%hD?VUx(8zK5^bVs3p9 w?pT?(;cx1N?LGHQE>xTS5@8E5Ke1Hx#Q#74zn?J!Ml*UuNHZ{l;~qo+0Fsg3fB*mh diff --git a/bio/metadmg/lca/test/acc2taxid.tsv b/bio/metadmg/lca/test/acc2taxid.tsv new file mode 100644 index 00000000000..7211a71a306 --- /dev/null +++ b/bio/metadmg/lca/test/acc2taxid.tsv @@ -0,0 +1 @@ +NC_023100.1 1425170 diff --git a/bio/metadmg/lca/test/names.dmp.gz b/bio/metadmg/lca/test/names.dmp.gz new file mode 100644 index 0000000000000000000000000000000000000000..bb43fd5167bfd17231726f898946e77e38e7c2ae GIT binary patch literal 969 zcmV;)12+60iwFo-?+axD18!k$WpgfMZEygUS6gq>Fcf|szamdOftE{?G}1I_4A>r+ zn6!!a)6_JkjvX9lsE8lWv2$zM*x972vU|RBzq!H1zYAatzwuKaxZsEK;@`UfZ&C== z{hm|875zzbk3&h29;R`IgfC4F@&KpY6RJ=^ECA90NZkTei!q#$V6iTP+Fo&NqU%m8a zQeRt~HUf@t)7ygMOU!R{K24B##{nIzl_*<(CihplWW@Y}KV<7oZL_LghwBuHPatzq zDIyO$N^NzLMXS`5sVa!1z`YX65?CjHD!0Ra>97kT9~tCG0Y=Z7wL?@r;uT5e-R^r)p)hT6>@^1Gogz);(Xj1JYgJ2m-O3)b8>$~lmx#+FIr z^?A#j;H@c%LtRC;Xt|UH<@Rz&{FM%YB%nN~+;!H0??s6cw?Mo*2?yQWVLW5$33$PR zN5OYTurbhyAGcz}a&ShhLz{*aO$mIq{Gu=pb%dXZl2D1Qwd#AI>VVG3X|VS4rsUMI zi+UIs7+DmlU3oUW={F93NOgkbEeI3S|inA#fYuXEeU;HB{v+K zWo9b%bTIU}?(7lWistTwbl?Z~J|AQ@Owk-O)i99W#D+7ufiAR9lwr#|=5Q3nW|ePo zxD#o14zWmVfuBd9UFynDQXT0BQy3K%lXxN-IP#wAr*3fI$^T33QbXfxycA}eOb2KQ%_jykp3P;Etx>=Xj zRS^34N?i|=nA<1uF;ZMU0>8Y&)@01q=)o#jCB`j$cof)yV#lboFC}CMO_FZ7Dx|0c z-4d9Q+Jc^FN$F1^%LRdYnBn4TzrCw|*PcJPkJ0W%?tvy^$5x(OkS3XF&U>hsaIe}R ziq=u0|1f-}6&L;9z*x(wuxdbC;=okrJwZ#@hR7X0pSZK&MjpL3i-D2wn%3SECW|OM rL%zo~k@+*L)GfkpY?S(e=K3Tg5oF=vcmkh*hl2PIQ)3|U2$UI`D%<8+W2&z&^I^Zj;1NM>sco5N z>uq_ej(Jl2e6Cx3muKnf`r(WMSn1dl`>Z%y@r)KmwZ_#;j8xi~=glrT<;Tms52t)i zLnwu|@GG*(aJ@w<5w+o}tbQ+ECu+rHiYlpp%R0^KcM@6=aiq}}d9kaL7nG9`dy|%u zx*d5Zp|hBvDDBh-E?79*PJ)sc5d4+|PO&uKO{Gb){l_s$vw4{p+Ua!=2_@8PFx+5I zNr+f3ZtW>+adgw%=w&PG9CoDpBnZ!=cnpbRiu7{94U9$1%Xvp3vSRedB3O{8j>mK1 zRc!$;MhZ5!=a@NKJ|jmlOYtALrDCo0*lxmna*r`%x0ajg`*J*=W(+WQxSWSbwd0*^ z@TjK;)k%m14yQKZ3eTydB4Du?c93yE{2Ydd&MifR%3icXUhLB{SOpmI%&bCZ5$5H2 zA4N{6f+!9`!!n*IUsSM^BM2mk=3 C%i$6L literal 0 HcmV?d00001 diff --git a/bio/metadmg/lca/wrapper.py b/bio/metadmg/lca/wrapper.py index 48b14aa5179..0d9c9cd9ba5 100644 --- a/bio/metadmg/lca/wrapper.py +++ b/bio/metadmg/lca/wrapper.py @@ -13,29 +13,29 @@ with tempfile.TemporaryDirectory() as tmpdir: shell( - "metaDMG-cpp lca -nthreads {snakemake.threads} -bam {snakemake.input.aln} -names {snakemake.input.names} -nodes {snakemake.input.nodes} -acc2tax {snakemake.input.acc2taxid} -tempfolder {tmpdir} {extra} --outnames {tmpdir}/out {log}" + "metaDMG-cpp lca -nthreads {snakemake.threads} -bam {snakemake.input.aln} -names {snakemake.input.names} -nodes {snakemake.input.nodes} -acc2tax {snakemake.input.acc2taxid} -tempfolder {tmpdir} {extra} -outnames {tmpdir}/out {log}" ) if snakemake.output.get("dmg"): assert snakemake.output.dmg.endswith( ".gz" - ), "'dmg' file is Gzip compressed, but extension is not '.gz'" + ), "'dmg' file is Gzip compressed, but has no '.gz' extension" shell("cat {tmpdir}/out.bdamage.gz > {snakemake.output.dmg}") if snakemake.output.get("lca"): assert snakemake.output.lca.endswith( ".gz" - ), "'lca' file is Gzip compressed, but extension is not '.gz'" + ), "'lca' file is Gzip compressed, but has no '.gz' extension" shell("cat {tmpdir}/out.lca.gz > {snakemake.output.lca}") if snakemake.output.get("stats"): assert not snakemake.output.stats.endswith( ".gz" - ), "'stats' file is NOT Gzip compressed, but extension is '.gz'" + ), "'stats' file is NOT Gzip compressed, but has '.gz' extension" shell("cat {tmpdir}/out.stat > {snakemake.output.stats}") if snakemake.output.get("log"): assert not snakemake.output.log.endswith( ".gz" - ), "'log' file is NOT Gzip compressed, but extension is '.gz'" + ), "'log' file is NOT Gzip compressed, but has '.gz' extension" shell("cat {tmpdir}/out.log > {snakemake.output.log}") From 4672531a09c61c29502bc5068a1fc6adafa5d50a Mon Sep 17 00:00:00 2001 From: fgvieira <1151762+fgvieira@users.noreply.github.com> Date: Tue, 7 Nov 2023 12:13:16 +0100 Subject: [PATCH 4/4] Add dfit wrapper and extra tests --- bio/metadmg/compressbam/environment.yaml | 2 +- bio/metadmg/compressbam/test/Snakefile | 1 - bio/metadmg/compressbam/test/genome.fasta | 101 ---------------------- bio/metadmg/compressbam/wrapper.py | 7 +- bio/metadmg/dfit/environment.yaml | 6 ++ bio/metadmg/dfit/meta.yaml | 19 ++++ bio/metadmg/dfit/test/Snakefile | 20 +++++ bio/metadmg/dfit/test/names.dmp.gz | Bin 0 -> 946 bytes bio/metadmg/dfit/test/nodes.dmp.gz | Bin 0 -> 460 bytes bio/metadmg/dfit/wrapper.py | 36 ++++++++ bio/metadmg/getdamage/environment.yaml | 2 +- bio/metadmg/getdamage/test/Snakefile | 5 +- bio/metadmg/getdamage/test/genome.fasta | 101 ---------------------- bio/metadmg/getdamage/wrapper.py | 26 ++---- bio/metadmg/lca/environment.yaml | 2 +- bio/metadmg/lca/meta.yaml | 1 - bio/metadmg/lca/test/Snakefile | 5 +- bio/metadmg/lca/test/acc2taxid.tsv | 3 +- bio/metadmg/lca/test/names.dmp.gz | Bin 969 -> 946 bytes bio/metadmg/lca/wrapper.py | 30 ++----- test.py | 22 ++++- 21 files changed, 124 insertions(+), 265 deletions(-) delete mode 100644 bio/metadmg/compressbam/test/genome.fasta create mode 100644 bio/metadmg/dfit/environment.yaml create mode 100644 bio/metadmg/dfit/meta.yaml create mode 100644 bio/metadmg/dfit/test/Snakefile create mode 100644 bio/metadmg/dfit/test/names.dmp.gz create mode 100644 bio/metadmg/dfit/test/nodes.dmp.gz create mode 100644 bio/metadmg/dfit/wrapper.py delete mode 100644 bio/metadmg/getdamage/test/genome.fasta diff --git a/bio/metadmg/compressbam/environment.yaml b/bio/metadmg/compressbam/environment.yaml index 1d9b3558d4d..7370c2427b7 100644 --- a/bio/metadmg/compressbam/environment.yaml +++ b/bio/metadmg/compressbam/environment.yaml @@ -3,4 +3,4 @@ channels: - bioconda - nodefaults dependencies: - - metadmg =0.3 + - metadmg =0.4 diff --git a/bio/metadmg/compressbam/test/Snakefile b/bio/metadmg/compressbam/test/Snakefile index 5e7ba6e0a59..08e1095ac4b 100644 --- a/bio/metadmg/compressbam/test/Snakefile +++ b/bio/metadmg/compressbam/test/Snakefile @@ -2,7 +2,6 @@ rule compressbam: input: aln="{sample}.bam", - ref="genome.fasta", output: "results/compressbam/{sample}.bam", log: diff --git a/bio/metadmg/compressbam/test/genome.fasta b/bio/metadmg/compressbam/test/genome.fasta deleted file mode 100644 index 157f91330cd..00000000000 --- a/bio/metadmg/compressbam/test/genome.fasta +++ /dev/null @@ -1,101 +0,0 @@ ->NC_023100.1 partial -GATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTTTGGGGG -GTGTGCACGCGATAGCATTGCGAAACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTC -CTGCCCCATCTCATTATTTATCGCACCTACGTTCAATATTACAGGCGAGCATANNTACTAAAGTGTGTTA -ATTAATTAATGCTTGTAGGACATAATAATAACNATTNAATGNCTGCACAGCCGCTTTCCACACAGACATC -ACAACNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGCTTCTGGCCACAGCACTTAAACACATCTCTGCCA -AACCCCAAAAACAAAGAACCCTAACACCAGCCTAGCCAGATTTCAAATTTTATCTTTTGGCGGTATGCAC -TTTTAACAGTCACCCCCCAACTAACACATTATTTTCCCCTCCCACTCCCATACTACTAATCTCATTAATA -CAACCCCCGCCCATNCTACCCANNACACACACACCGCTGCTAACTCCATACCCCGAACCAACCAAACCCC -AAAGACACCCCCCANAGTTTATGTAGCTTACCTCCTCAAAGCAATACACTGAAAATGTTTAGACGGGCTC -ACATCACCCCATAAACAAATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGCAA -GCATCCCCATTCCAGTGAGTTCACCCTCTAAATCACCACGATCAAAAGGAACAAGCATCAAGCACGCAGC -AATGCAGCTCAAAACGCTTAGCCTAGCCACACCCCCACGGGAAACAGCAGTGATAAGCCTTTAGCAATAA -ACGAAAGTTTAACTAAGCTATACTAACTCCAGGGTTGGTCAATTTCGTGCCAGCCACCGCGGTCACACGA -TTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTTTAGATCANNNNNNNNNNAATAAAGCTAAAACT -CACCTGAGTTGTAAAAAACTCCAGTTGATACAAAATAAACTACGAAAGTGGCTTTAACACATCTGAATAC -ACAATAGCTAAGGCCCAAACTGGGATTAGATACCCCACTATGCTTAGCCCTAAACCTCAACAGTTAAATC -AACAAAACTGCTCGCCAGAACACTACGAGCCACAGCTTAAAACTCAAAGGACCTGGCGGTGCTTCATATC -CCTCTAGAGGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCACCACCCCTTGCTCAGCCTATATA -CCGCCATCTTCAGCAAACCCTGATGAAGGCTACAAAGTAAGCGCAAGTACCCACGTAAAGACGTTAGGTC -AAGGTGTAGCCCATGAGGTGGCAAGAAATGGGCTACATTTTCTACCCCAGAAAACTACGATAACCCTTAT -GAAATTTAAGGGTCGAAGGTGGATTTAGCAGTAAACTGAGAATAGAGTGCTTAGTTGAACAGGGCCCTGA -AGCGCGTACACACCGCCCGTCACCCTCCTCAAGTATACTTCAAAGGACATTTAACTAAAACCCCTACGCA -TTTATATAGAGGAGACAAGTCGTAACATGGTAAGTGTACTGGAAAGTGCACTTGGACGAACCAGAGTGTA -GCTTAACACAAAGCACCCAACTTACACTTAGGAGATTTCAACTTAACTTGACCGCTCTGAGCTAAACCTA -GCCCCAAACCCACTCCACCTTACTACCANACAACCTTAACCAAACCATTTACCCAAATAAAGTATAGGCG -ATAGAAATTGTAACCCGGCGCAATAGATATAGTACCGCAAGGGAAAGATGAAAAATTATAACCAAGCATA -ATATAGCAAGGACTAACCCCTATACCTTCTGCATAATGAATTAACTAGAAATAACTTTGCAAGGAGAACC -AAAGCTAAGACCCCCGAAACCAGACGAGCTACCTAAGAACAGCTAAAAGAGCACACCCGTCTATGTAGCA -AAATAGTGGGAAGATTTATAGGTAGAGGCGACAAANCTANCGAGCCTGGTGATAGCTGGTTNTCCAAGAT -AGAATCTTAGTTCAACTTTAAATTTACCCACAGAACCCTCTAAATCCCCTTGTAAATTTAACTGTTAGTC -CAAAGAGGAACAGCTCTTTGGACACTAGGAAAAAACCTTGTAGAGAGAGTAAAAAATTTAACACCCATAG -TAGGCCTAAAAGCAGCCACCAATTAAGAAAGCGTTCAAGCTCAACACCCACTACCTAAAAAATCCCAAAC -ATATAACTGAACTCCTCACACCCAATTGGACCAATCTATCACCTTATAGAAGAACTAATGTTAGTATAAG -TAACATGAAAACATTCTCCTCCGCATAAGCCTGCGTCAGATTAAAACACTGAACTGACAATTAACAGCCC -AATATCTACAATCAACCAACAAGTCATTATTACCCTCACTGTCAACCCAACACAGGCATGCTCATAAGGA -AAGGTTAAAAAAAGTAAAAGGAACTCGGCAAATCTTACCCCGCCTGTTTACCAAAAACATCACCTCTAGC -ATTACCAGTATTAGAGGCACCGCCTGCCCAGTGACACATGTTTAACGGCCGCGGTACCCTAACCGTGCAA -AGGTAGCATAATCACTTGTTCCTTAAATAGGGACCTGTATGAATGGCTCCACGAGGGTTCAGCTGTCTCT -TACTTTTAACCAGTGAAATTGACCTGCCCGTGAAGAGGCGGGCATAACACAGCAAGACGAGAAGACCCTA -TGGAGCTTTAATTTATTAATGCAAACAATACCTAACAAACCCACAGGTCCTAAACTACCAAACCTGCATT -AAAAATTTCGGTTGGGGCGACCTCGGAGCACAGCCCAACCTCCGAGCAGTACATGCTAAGACTTCACCAG -TCAAAGCGAACTACCATACTCAATTGATCCAATAACTTGACCAACGGAACAAGTTACCCTAGGGATAACA -GCGCAATCCTATTCCAGAGTCCATATCAACAATAGGGTTTACGACCTCGATGTTGGATCAGGACATCCCG -ATGGTGCAGCCGCTATTAAAGGTTCGTTTGTTCAACGATTAAAGTCCTACGTGATCTGAGTTCAGACCGG -AGTAATCCAGGTCGGTTTCTATCTACTTCAAATTCCTCCCTGTACGAAAGGACAAGAGAAATAAGGCCTA -CTTCACAAAGCGCCTTCCCCCGTAAATGATATCATCTCAACTTAGTATTNNNNNNNNNNNNNNCCAAGAA -CAGGNTTTGTTAAGATGGCAGAGCCCGGTAATCGCATAAAACTTAAAACTTTACAATCAGAGGTTCAACT -CCTCTTCTTAACAACATACCCATGGCCAACCTCCTACTCCTCATTGCACCCATTCTAATCGCAATGGCAT -TCCTAATGCTTACCGAACGAAAAATTCTAGGCTATATACAACTACGCAAAGGCCCCAACGTTGTAGGCCC -CTACGGGCTACTACAGCCCTTCGCTGACGCCATAAAACTCTTCACCAAAGAGCCCCTAAAACCCTCCACA -TCNACCATCACCCTCTACATCACCGCCCCGACCTTAGCTCTCACCATCGCTCTTCTACTATGAACTCCCC -TCCCCATACCCAACCCCCTGGTCAACCTCAANCTAGGCCTCCTATTTATCCTAGCCACCTCTAGCCTAGC -CGTTTACTCAATCCTCTGATCAGGGTGAGCGTCAAACTCAAACTACGCCCTGATCGGCGCACTGCGAGCA -GTGGCCCAAACAATCTCATATGAAGTCACCCTAGCTATCATCCTACTATCAACATTACTAATAAGTGGCT -CCTTTAACCTCTCCACCCTTATCACAACACAAGAGCACCTCTGATTACTCCTGCCATCATGACCCTTGGC -CATAATATGATTTATCTCCACACTAGCAGAGACCAACCGNANCCCCTTCGACCTTGCNGAAGGGGAGTCC -GAACTAGTCTCAGGCTTCAACATCGAATACGCCGCAGGCCCCTTCGCCCTATTCTTTATAGCCGAATACA -CAAACATCATTATAATAAACACCCTCACCACTACAATCTTCCTAGGAACAACATATAACGCACTCTCCCC -TGAACTCTACACAACATATTTTGTCACCAAGACCCTACTTCTNACCTCCCTATTCTTATGAATTCGAACA -GCATACCCCCGATTCCGCTACGACCAACTCATNCACCTNCTATGAAAAAACTTCCTACCACTCACCCTAG -CATTACTTATATGATATGTCTCCATACCCATTACAATCTCCAGCATTCCCCCTCAAACCTAAGAAATATG -TCTGATAAAAGAGTTACTTTGATAGAGTAAATAATAGGAGTTTAAATCCCCTTATTTCTAGGACTATGAG -AATCGAACCCATCCCTGAGAATCCAAAATTCTCCGTGCTACCTATCACACCCCATCCTAAAGTAAGGTCA -GCTAAATAAGCTATCGGGCCCATACCCCGAAAATGTTGGTTATATCCTTCCCGTACTAATTAATCCCCTG -GCCCAACCCGNCATNNACTCTACCATCTTTACAGGCACACTCATCACAGCGCTAAGCTCGCACTGATTTT -TTACCTGAGTAGGCCTAGAAATAAACATGCTAGCTTTTATTCCAGTTCTAACCAAAAAAATAAACCCTCG -TTCCACAGAAGCTGCCATCAAGTATTTCCTCACGCAAGCAACCGCATCCATAATCCTTCTAATAGCTATC -CTCTTCAACAATATACTCTCCGGACAATGAACCATAACCAACACTANNNATNNNNNNNNGTCATTAATAA -TCATAATGGCTATAGCAATAAAACTAGGAATAGCCCCCTTTCACTTCTGAGTCCCAGAGGTTACCCAAGG -CACCCCTCTGACATCCGGCCTGCTACTTCTCACATGACAAAAACTAGCCCCCATCTCAATCATATACCAA -ATTTCCCCCTCATTAAACGTAAGCCTTCTCCTCACTCTTTCAATCTTATCCATCATGGCAGGCAGTTGAG -GTGGACTAAACCAAACCCAACTACGCAAAATCTTAGCATACTCCTCAATTACCCACATAGGATGAATAAT -AGCAGTTCTACCGTACAACCCTAACATAACCATTCTTAATTTAACTATTTATATTATNCTAACTACTACC -GCATTCCTACTACTCAACTTAAACTCCAGCACCACAACCCTACTACTATCTCGCACCTGAAACAAACTAA -CATGACTAACACCCTTAATTCCATCCACCCTCCTCTCCCTAGGAGGCCTGCCCCCGCTAACCGGCTTTTT -GCCCAAATGGGCCATTATCGAAGAATTCACAAAAAACAATAGCCTCATCATCCCCACCATCATAGCCACC -ATCACCCTCCTTAACCTCTACTTCTACCTGCGCCTAATCTACTCCACCTCAATCACACTACTCCCTATAT -CTAACAACGTAAAAATAAAATGACAGTTTGAACATACAAAACCCACCCCATTCCTCCCCACACTCATCGC -CCTTACCACACTGCTCCTACCTATCTCCCCTTTTATACTNNNNNNNNNNNAGAAATTTAGGTTAAATACA -GACCAAGAGCCTTCAAAGCCCTCAGTAAGTTGCAATACTTAATTTCTGCAACAGCTAAGGACTGCAAAAT -CCCACTCTGCATCAACTGAACGCAAATCAGCCACTTTAATTAAGCTAAGCCCTTACTAGACCAATGGGAC -TTAAACCCACAAACACTTAGTTAACAGCTAAGCACCCTAATCAACTGGCTTCAATCTACTTCTCCCGCCG -CCGGGAAAAAAGGCGGGAGAAGCCCCGGCAGGTTTGAAGCTGCTTCTTCGAATTTGCAATTCAATATGAA -AATCACCTCAGAGCTGGTAAAAAGAGGCTTAACCCCTGTCTTTAGATTTACAGTCCAATGCTTCACTCAG -CCATTTTACCTCACCCCCACTGATGTTCGCCGACCGTTGACTATTCTCTACAAACCACAAAGACATTGGA -ACACTATACCTATTATTCGGCGCATGAGCTGGAGTCCTAGGCACAGCTCTAAGCCTCCTTATTCGAGCCG -AACTGGGCCAGCCAGGCAACCTTCTAGGTAACGACCACATCTACAACGTTATCGTCACAGCCCATGCATT -TGTAATAATCTTCTTCATAGTAATACCCATCATAATCGGAGGCTTTGGCAACTGACTAGTTCCCCTAATA -ATCGGTGCCCCCGATATGGCGTTTCCCCGCATAAACAACATAAGCTTCTGACTCTTACCNCCCTCCCTCC -TACTCCTGCTTGCATCTGCTATAGTGGAGGCCGGCGCAGGGACAGGTTGAACAGTCTACCCTCCCTTAGC -AGGGAACTACTCCCACCCTGGAGCCTCCGTAGACCTAACCATCTTCTCCTTGCACCTAGCAGNTATTTCC -TCTATCTTAGGGGCCATCAATTTCATCACAACAATTATCAATATAAAACCCCCTGCCATGACCCAATACC -AAACGCCCCTTTTCGTCTGATCCGTCCTAATCACAGCAGTCTTGCTTCTCCTATCTCTCCCAGTCCTGGC -CGCTGGCATCACTATACTACTAACAGACCGNAACCTCAACACCACCTTCTTCGACCCNGCCGGAGGAGGA -GACCCCATTCTATACCAACACCTATTCTGATTCTTCGGTCACCCTGAAGTTTATATTCTCATCCTACCAG -GCTTCGGAATAATCTCTCATATTGTAACTTACTACTCCGGAAAAAAAGAACCATTTGGATACATAGGTAT -GGTCTGAGCTATGATATCAATTGGCTTCCTAGGGTTTATCGTGTGAGCACACCATATATTTACAGTAGGA -ATAGACGTAGACACACGAGCATATTTCACCTCCGCTACCATAATCATCGCTATCCCCACCGGCGTCAAAG -TATTTAGCTGACTCGCCACACTCCACGGAAGCAATATGAAATGATCTGCTGCAGTGCTCTGAGCCCTAGG -ATTTATTTTTCTTTTCACCGTAGGTGGCCTGACTGGCATTGTATTAGCAAACTCATCACTAGACATCGTA diff --git a/bio/metadmg/compressbam/wrapper.py b/bio/metadmg/compressbam/wrapper.py index b35329493bd..205649a4eb8 100644 --- a/bio/metadmg/compressbam/wrapper.py +++ b/bio/metadmg/compressbam/wrapper.py @@ -14,12 +14,9 @@ ref = snakemake.input.get("ref", "") if ref: - ref = f"-ref {ref}" - - -out_fmt = Path(snakemake.input.aln).suffix.lstrip(".").lower() + ref = f"--ref {ref}" shell( - "compressbam -@ {snakemake.threads} -hts {snakemake.input.aln} {ref} {extra} -type {out_fmt} -out {snakemake.output[0]} {log}" + "compressbam --threads {snakemake.threads} --input {snakemake.input.aln} {ref} {extra} --output {snakemake.output[0]} {log}" ) diff --git a/bio/metadmg/dfit/environment.yaml b/bio/metadmg/dfit/environment.yaml new file mode 100644 index 00000000000..7370c2427b7 --- /dev/null +++ b/bio/metadmg/dfit/environment.yaml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - nodefaults +dependencies: + - metadmg =0.4 diff --git a/bio/metadmg/dfit/meta.yaml b/bio/metadmg/dfit/meta.yaml new file mode 100644 index 00000000000..e095b157d4b --- /dev/null +++ b/bio/metadmg/dfit/meta.yaml @@ -0,0 +1,19 @@ +name: metaDMG dfit +url: https://github.com/metaDMG-dev/metaDMG-cpp +description: metaDMG-cpp is a fast and efficient method for estimating mutation and damage rates in ancient DNA data +authors: + - Filipe G. Vieira +input: + - aln: SAM/BAM/CRAM file + - names: taxonomy file "names.dmp" + - nodes: taxonomy file "nodes.dmp" + - acc2taxid: TSV with correspondence between accesions and taxa IDs +output: + - dmg: path to TSV file containing counts of mismatchs conditional on strand and cycle. + - lca: path to TSV file with LCA results. + - stat: path to TSV file with general stats. +params: + - extra: additional program arguments. +notes: | + * Input BAM file has to be sorted by read name. + * More information about output formats in https://github.com/metaDMG-dev/metaDMG-cpp/blob/master/doc/formats.pdf diff --git a/bio/metadmg/dfit/test/Snakefile b/bio/metadmg/dfit/test/Snakefile new file mode 100644 index 00000000000..e9fd2e58740 --- /dev/null +++ b/bio/metadmg/dfit/test/Snakefile @@ -0,0 +1,20 @@ + +rule metadmg_dfit: + input: + dmg="{sample}.bam", + names="names.dmp.gz", + nodes="nodes.dmp.gz", + lca_stats="{sample}.stat", + output: + dfit="results/dfit/{sample}.out.gz", + stats_dfit="stats/dfit/{sample}.dfit.tsv.gz", + stats_boot="stats/dfit/{sample}.boot.tsv.gz", + log: + "logs/dfit/{sample}.log", + params: + extra="--nopt 10 --doboot 1 --nbootstrap 20 --showfits 2 --lib ds", + threads: 1 + resources: + mem_mb=1024, + wrapper: + "master/bio/metadmg/dfit" diff --git a/bio/metadmg/dfit/test/names.dmp.gz b/bio/metadmg/dfit/test/names.dmp.gz new file mode 100644 index 0000000000000000000000000000000000000000..44fda1e8ca21df05d162e4a4985d985c88359c2e GIT binary patch literal 946 zcmV;j15NxNiwFQyIz?pw1C>`xZ`(K!KBvDzr#*EM>um!;fHdtUhXxxU#oos@Yq6$C zm7-F(=#SqaMM;(=QVC!fr01LWiw>{;TmfVFji36&1wWNne?EkGlR_x_`;HQ>=y$pc zIFtnGQJ!v)@TJ*-Ji#URh$<8i2Y^@rsawE-@GbCTuoGn|_(a#ytLy03)dLn>A{J+C z8ZN=jmO-ry_!cR{?G3DF{%GE@d)X?FjD00h@LM?T%kxlY<-QPd4{kRsY}dk!DgD{& zgr9LHHMh=;<$0oA`j1FOwi1mE?weOiN?0$JV2ZH=|3e@5<*U zNn-7lq~*|dgGVj3GE_Tz$j^qp0z+*VGAzovJ2v@T3)b9M$~lmx#+J#`^|WO!@XnN^ zk*=b9v|P%9a(6jot3-!D5>NqD{yMYZM^U20J&@o*!bvxGl+IXs0ba1+S@6RdYzTC= zN?S2vIXDB>qfJAKrUX7)esPpWI>O(Gl2D1gv+6sbvOsU-G+29iS90pvMI8(b%%2R6 z)vkOqy%{`%JQ5lF6+A-yi!i3svdO108nX@jH?}3`r&1h;sW$AN2D5bi)NZWcAVpJM zCai=0ThSWT9tGEX0I)G|6a4*BQdI=uW<6)x45u?v7App9J+&lS=_quq>6Jo(n{(audY?z`u%v8fb1|u6LazkBcpD4qY_sroqPR%Oc<8UX^>>T2d z*aANfK)2LnmE}6pPo_SWal>f0C-2DiaJ_3t2X(v2!b@7UbE+H0L8!O!!xA7{}{4(|;H~ z(~663Z(yutRX8=EEoo?~^O2w>Y)j+;pHJLda08ECn?=t^cuk$r43jutg_`S=kVKG$ USKkGE1n#r>Kdr(7&}R++0D}$8qW}N^ literal 0 HcmV?d00001 diff --git a/bio/metadmg/dfit/test/nodes.dmp.gz b/bio/metadmg/dfit/test/nodes.dmp.gz new file mode 100644 index 0000000000000000000000000000000000000000..90eaef09c244b9dc1d10d4e912995c28a3760666 GIT binary patch literal 460 zcmV;-0WU2$UI`D%<8+W2&z&^I^Zj;1NM>sco5N z>uq_ej(Jl2e6Cx3muKnf`r(WMSn1dl`>Z%y@r)KmwZ_#;j8xi~=glrT<;Tms52t)i zLnwu|@GG*(aJ@w<5w+o}tbQ+ECu+rHiYlpp%R0^KcM@6=aiq}}d9kaL7nG9`dy|%u zx*d5Zp|hBvDDBh-E?79*PJ)sc5d4+|PO&uKO{Gb){l_s$vw4{p+Ua!=2_@8PFx+5I zNr+f3ZtW>+adgw%=w&PG9CoDpBnZ!=cnpbRiu7{94U9$1%Xvp3vSRedB3O{8j>mK1 zRc!$;MhZ5!=a@NKJ|jmlOYtALrDCo0*lxmna*r`%x0ajg`*J*=W(+WQxSWSbwd0*^ z@TjK;)k%m14yQKZ3eTydB4Du?c93yE{2Ydd&MifR%3icXUhLB{SOpmI%&bCZ5$5H2 zA4N{6f+!9`!!n*IUsSM^BM2mk=3 C%i$6L literal 0 HcmV?d00001 diff --git a/bio/metadmg/dfit/wrapper.py b/bio/metadmg/dfit/wrapper.py new file mode 100644 index 00000000000..482bea487c7 --- /dev/null +++ b/bio/metadmg/dfit/wrapper.py @@ -0,0 +1,36 @@ +__author__ = "Filipe G. Vieira" +__copyright__ = "Copyright 2023, Filipe G. Vieira" +__license__ = "MIT" + + +import tempfile +from snakemake.shell import shell + + +extra = snakemake.params.get("extra", "") +log = snakemake.log_fmt_shell(stdout=True, stderr=True) + + +names = snakemake.input.get("names", "") +if names: + names = f"--names {names}" + +nodes = snakemake.input.get("nodes", "") +if nodes: + nodes = f"--nodes {nodes}" + +lca_stats = snakemake.input.get("lca_stats", "") +if lca_stats: + lca_stats = f"--lcastat {lca_stats}" + + +with tempfile.TemporaryDirectory() as tmpdir: + shell( + "metaDMG-cpp dfit {snakemake.input.dmg} {names} {nodes} {lca_stats} {extra} --out_prefix {tmpdir}/out {log}" + ) + + for output in snakemake.output: + for ext in [".dfit.txt.gz", ".dfit.stat.txt.gz", ".boot.stat.txt.gz"]: + if output.endswith(ext): + shell("cat {tmpdir}/out{ext} > {output}") + continue diff --git a/bio/metadmg/getdamage/environment.yaml b/bio/metadmg/getdamage/environment.yaml index 1d9b3558d4d..7370c2427b7 100644 --- a/bio/metadmg/getdamage/environment.yaml +++ b/bio/metadmg/getdamage/environment.yaml @@ -3,4 +3,4 @@ channels: - bioconda - nodefaults dependencies: - - metadmg =0.3 + - metadmg =0.4 diff --git a/bio/metadmg/getdamage/test/Snakefile b/bio/metadmg/getdamage/test/Snakefile index 51e7751cd3d..085c68adc08 100644 --- a/bio/metadmg/getdamage/test/Snakefile +++ b/bio/metadmg/getdamage/test/Snakefile @@ -2,15 +2,14 @@ rule metadmg_getdamage: input: aln="{sample}.bam", - ref="genome.fasta", output: res="results/getdamage/{sample}.out.gz", dmg="results/getdamage/{sample}.dmg.gz", - stats="results/getdamage/{sample}.tsv", + stats="stats/getdamage/{sample}.tsv", log: "logs/getdamage/{sample}.log", params: - extra="-l 30 -p 30 -r 1", + extra="--min_length 30 --print_length 30 --run_mode 1", threads: 4 resources: mem_mb=1024, diff --git a/bio/metadmg/getdamage/test/genome.fasta b/bio/metadmg/getdamage/test/genome.fasta deleted file mode 100644 index 157f91330cd..00000000000 --- a/bio/metadmg/getdamage/test/genome.fasta +++ /dev/null @@ -1,101 +0,0 @@ ->NC_023100.1 partial -GATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTTTGGGGG -GTGTGCACGCGATAGCATTGCGAAACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTC -CTGCCCCATCTCATTATTTATCGCACCTACGTTCAATATTACAGGCGAGCATANNTACTAAAGTGTGTTA -ATTAATTAATGCTTGTAGGACATAATAATAACNATTNAATGNCTGCACAGCCGCTTTCCACACAGACATC -ACAACNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGCTTCTGGCCACAGCACTTAAACACATCTCTGCCA -AACCCCAAAAACAAAGAACCCTAACACCAGCCTAGCCAGATTTCAAATTTTATCTTTTGGCGGTATGCAC -TTTTAACAGTCACCCCCCAACTAACACATTATTTTCCCCTCCCACTCCCATACTACTAATCTCATTAATA -CAACCCCCGCCCATNCTACCCANNACACACACACCGCTGCTAACTCCATACCCCGAACCAACCAAACCCC -AAAGACACCCCCCANAGTTTATGTAGCTTACCTCCTCAAAGCAATACACTGAAAATGTTTAGACGGGCTC -ACATCACCCCATAAACAAATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGCAA -GCATCCCCATTCCAGTGAGTTCACCCTCTAAATCACCACGATCAAAAGGAACAAGCATCAAGCACGCAGC -AATGCAGCTCAAAACGCTTAGCCTAGCCACACCCCCACGGGAAACAGCAGTGATAAGCCTTTAGCAATAA -ACGAAAGTTTAACTAAGCTATACTAACTCCAGGGTTGGTCAATTTCGTGCCAGCCACCGCGGTCACACGA -TTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTTTAGATCANNNNNNNNNNAATAAAGCTAAAACT -CACCTGAGTTGTAAAAAACTCCAGTTGATACAAAATAAACTACGAAAGTGGCTTTAACACATCTGAATAC -ACAATAGCTAAGGCCCAAACTGGGATTAGATACCCCACTATGCTTAGCCCTAAACCTCAACAGTTAAATC -AACAAAACTGCTCGCCAGAACACTACGAGCCACAGCTTAAAACTCAAAGGACCTGGCGGTGCTTCATATC -CCTCTAGAGGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCACCACCCCTTGCTCAGCCTATATA -CCGCCATCTTCAGCAAACCCTGATGAAGGCTACAAAGTAAGCGCAAGTACCCACGTAAAGACGTTAGGTC -AAGGTGTAGCCCATGAGGTGGCAAGAAATGGGCTACATTTTCTACCCCAGAAAACTACGATAACCCTTAT -GAAATTTAAGGGTCGAAGGTGGATTTAGCAGTAAACTGAGAATAGAGTGCTTAGTTGAACAGGGCCCTGA -AGCGCGTACACACCGCCCGTCACCCTCCTCAAGTATACTTCAAAGGACATTTAACTAAAACCCCTACGCA -TTTATATAGAGGAGACAAGTCGTAACATGGTAAGTGTACTGGAAAGTGCACTTGGACGAACCAGAGTGTA -GCTTAACACAAAGCACCCAACTTACACTTAGGAGATTTCAACTTAACTTGACCGCTCTGAGCTAAACCTA -GCCCCAAACCCACTCCACCTTACTACCANACAACCTTAACCAAACCATTTACCCAAATAAAGTATAGGCG -ATAGAAATTGTAACCCGGCGCAATAGATATAGTACCGCAAGGGAAAGATGAAAAATTATAACCAAGCATA -ATATAGCAAGGACTAACCCCTATACCTTCTGCATAATGAATTAACTAGAAATAACTTTGCAAGGAGAACC -AAAGCTAAGACCCCCGAAACCAGACGAGCTACCTAAGAACAGCTAAAAGAGCACACCCGTCTATGTAGCA -AAATAGTGGGAAGATTTATAGGTAGAGGCGACAAANCTANCGAGCCTGGTGATAGCTGGTTNTCCAAGAT -AGAATCTTAGTTCAACTTTAAATTTACCCACAGAACCCTCTAAATCCCCTTGTAAATTTAACTGTTAGTC -CAAAGAGGAACAGCTCTTTGGACACTAGGAAAAAACCTTGTAGAGAGAGTAAAAAATTTAACACCCATAG -TAGGCCTAAAAGCAGCCACCAATTAAGAAAGCGTTCAAGCTCAACACCCACTACCTAAAAAATCCCAAAC -ATATAACTGAACTCCTCACACCCAATTGGACCAATCTATCACCTTATAGAAGAACTAATGTTAGTATAAG -TAACATGAAAACATTCTCCTCCGCATAAGCCTGCGTCAGATTAAAACACTGAACTGACAATTAACAGCCC -AATATCTACAATCAACCAACAAGTCATTATTACCCTCACTGTCAACCCAACACAGGCATGCTCATAAGGA -AAGGTTAAAAAAAGTAAAAGGAACTCGGCAAATCTTACCCCGCCTGTTTACCAAAAACATCACCTCTAGC -ATTACCAGTATTAGAGGCACCGCCTGCCCAGTGACACATGTTTAACGGCCGCGGTACCCTAACCGTGCAA -AGGTAGCATAATCACTTGTTCCTTAAATAGGGACCTGTATGAATGGCTCCACGAGGGTTCAGCTGTCTCT -TACTTTTAACCAGTGAAATTGACCTGCCCGTGAAGAGGCGGGCATAACACAGCAAGACGAGAAGACCCTA -TGGAGCTTTAATTTATTAATGCAAACAATACCTAACAAACCCACAGGTCCTAAACTACCAAACCTGCATT -AAAAATTTCGGTTGGGGCGACCTCGGAGCACAGCCCAACCTCCGAGCAGTACATGCTAAGACTTCACCAG -TCAAAGCGAACTACCATACTCAATTGATCCAATAACTTGACCAACGGAACAAGTTACCCTAGGGATAACA -GCGCAATCCTATTCCAGAGTCCATATCAACAATAGGGTTTACGACCTCGATGTTGGATCAGGACATCCCG -ATGGTGCAGCCGCTATTAAAGGTTCGTTTGTTCAACGATTAAAGTCCTACGTGATCTGAGTTCAGACCGG -AGTAATCCAGGTCGGTTTCTATCTACTTCAAATTCCTCCCTGTACGAAAGGACAAGAGAAATAAGGCCTA -CTTCACAAAGCGCCTTCCCCCGTAAATGATATCATCTCAACTTAGTATTNNNNNNNNNNNNNNCCAAGAA -CAGGNTTTGTTAAGATGGCAGAGCCCGGTAATCGCATAAAACTTAAAACTTTACAATCAGAGGTTCAACT -CCTCTTCTTAACAACATACCCATGGCCAACCTCCTACTCCTCATTGCACCCATTCTAATCGCAATGGCAT -TCCTAATGCTTACCGAACGAAAAATTCTAGGCTATATACAACTACGCAAAGGCCCCAACGTTGTAGGCCC -CTACGGGCTACTACAGCCCTTCGCTGACGCCATAAAACTCTTCACCAAAGAGCCCCTAAAACCCTCCACA -TCNACCATCACCCTCTACATCACCGCCCCGACCTTAGCTCTCACCATCGCTCTTCTACTATGAACTCCCC -TCCCCATACCCAACCCCCTGGTCAACCTCAANCTAGGCCTCCTATTTATCCTAGCCACCTCTAGCCTAGC -CGTTTACTCAATCCTCTGATCAGGGTGAGCGTCAAACTCAAACTACGCCCTGATCGGCGCACTGCGAGCA -GTGGCCCAAACAATCTCATATGAAGTCACCCTAGCTATCATCCTACTATCAACATTACTAATAAGTGGCT -CCTTTAACCTCTCCACCCTTATCACAACACAAGAGCACCTCTGATTACTCCTGCCATCATGACCCTTGGC -CATAATATGATTTATCTCCACACTAGCAGAGACCAACCGNANCCCCTTCGACCTTGCNGAAGGGGAGTCC -GAACTAGTCTCAGGCTTCAACATCGAATACGCCGCAGGCCCCTTCGCCCTATTCTTTATAGCCGAATACA -CAAACATCATTATAATAAACACCCTCACCACTACAATCTTCCTAGGAACAACATATAACGCACTCTCCCC -TGAACTCTACACAACATATTTTGTCACCAAGACCCTACTTCTNACCTCCCTATTCTTATGAATTCGAACA -GCATACCCCCGATTCCGCTACGACCAACTCATNCACCTNCTATGAAAAAACTTCCTACCACTCACCCTAG -CATTACTTATATGATATGTCTCCATACCCATTACAATCTCCAGCATTCCCCCTCAAACCTAAGAAATATG -TCTGATAAAAGAGTTACTTTGATAGAGTAAATAATAGGAGTTTAAATCCCCTTATTTCTAGGACTATGAG -AATCGAACCCATCCCTGAGAATCCAAAATTCTCCGTGCTACCTATCACACCCCATCCTAAAGTAAGGTCA -GCTAAATAAGCTATCGGGCCCATACCCCGAAAATGTTGGTTATATCCTTCCCGTACTAATTAATCCCCTG -GCCCAACCCGNCATNNACTCTACCATCTTTACAGGCACACTCATCACAGCGCTAAGCTCGCACTGATTTT -TTACCTGAGTAGGCCTAGAAATAAACATGCTAGCTTTTATTCCAGTTCTAACCAAAAAAATAAACCCTCG -TTCCACAGAAGCTGCCATCAAGTATTTCCTCACGCAAGCAACCGCATCCATAATCCTTCTAATAGCTATC -CTCTTCAACAATATACTCTCCGGACAATGAACCATAACCAACACTANNNATNNNNNNNNGTCATTAATAA -TCATAATGGCTATAGCAATAAAACTAGGAATAGCCCCCTTTCACTTCTGAGTCCCAGAGGTTACCCAAGG -CACCCCTCTGACATCCGGCCTGCTACTTCTCACATGACAAAAACTAGCCCCCATCTCAATCATATACCAA -ATTTCCCCCTCATTAAACGTAAGCCTTCTCCTCACTCTTTCAATCTTATCCATCATGGCAGGCAGTTGAG -GTGGACTAAACCAAACCCAACTACGCAAAATCTTAGCATACTCCTCAATTACCCACATAGGATGAATAAT -AGCAGTTCTACCGTACAACCCTAACATAACCATTCTTAATTTAACTATTTATATTATNCTAACTACTACC -GCATTCCTACTACTCAACTTAAACTCCAGCACCACAACCCTACTACTATCTCGCACCTGAAACAAACTAA -CATGACTAACACCCTTAATTCCATCCACCCTCCTCTCCCTAGGAGGCCTGCCCCCGCTAACCGGCTTTTT -GCCCAAATGGGCCATTATCGAAGAATTCACAAAAAACAATAGCCTCATCATCCCCACCATCATAGCCACC -ATCACCCTCCTTAACCTCTACTTCTACCTGCGCCTAATCTACTCCACCTCAATCACACTACTCCCTATAT -CTAACAACGTAAAAATAAAATGACAGTTTGAACATACAAAACCCACCCCATTCCTCCCCACACTCATCGC -CCTTACCACACTGCTCCTACCTATCTCCCCTTTTATACTNNNNNNNNNNNAGAAATTTAGGTTAAATACA -GACCAAGAGCCTTCAAAGCCCTCAGTAAGTTGCAATACTTAATTTCTGCAACAGCTAAGGACTGCAAAAT -CCCACTCTGCATCAACTGAACGCAAATCAGCCACTTTAATTAAGCTAAGCCCTTACTAGACCAATGGGAC -TTAAACCCACAAACACTTAGTTAACAGCTAAGCACCCTAATCAACTGGCTTCAATCTACTTCTCCCGCCG -CCGGGAAAAAAGGCGGGAGAAGCCCCGGCAGGTTTGAAGCTGCTTCTTCGAATTTGCAATTCAATATGAA -AATCACCTCAGAGCTGGTAAAAAGAGGCTTAACCCCTGTCTTTAGATTTACAGTCCAATGCTTCACTCAG -CCATTTTACCTCACCCCCACTGATGTTCGCCGACCGTTGACTATTCTCTACAAACCACAAAGACATTGGA -ACACTATACCTATTATTCGGCGCATGAGCTGGAGTCCTAGGCACAGCTCTAAGCCTCCTTATTCGAGCCG -AACTGGGCCAGCCAGGCAACCTTCTAGGTAACGACCACATCTACAACGTTATCGTCACAGCCCATGCATT -TGTAATAATCTTCTTCATAGTAATACCCATCATAATCGGAGGCTTTGGCAACTGACTAGTTCCCCTAATA -ATCGGTGCCCCCGATATGGCGTTTCCCCGCATAAACAACATAAGCTTCTGACTCTTACCNCCCTCCCTCC -TACTCCTGCTTGCATCTGCTATAGTGGAGGCCGGCGCAGGGACAGGTTGAACAGTCTACCCTCCCTTAGC -AGGGAACTACTCCCACCCTGGAGCCTCCGTAGACCTAACCATCTTCTCCTTGCACCTAGCAGNTATTTCC -TCTATCTTAGGGGCCATCAATTTCATCACAACAATTATCAATATAAAACCCCCTGCCATGACCCAATACC -AAACGCCCCTTTTCGTCTGATCCGTCCTAATCACAGCAGTCTTGCTTCTCCTATCTCTCCCAGTCCTGGC -CGCTGGCATCACTATACTACTAACAGACCGNAACCTCAACACCACCTTCTTCGACCCNGCCGGAGGAGGA -GACCCCATTCTATACCAACACCTATTCTGATTCTTCGGTCACCCTGAAGTTTATATTCTCATCCTACCAG -GCTTCGGAATAATCTCTCATATTGTAACTTACTACTCCGGAAAAAAAGAACCATTTGGATACATAGGTAT -GGTCTGAGCTATGATATCAATTGGCTTCCTAGGGTTTATCGTGTGAGCACACCATATATTTACAGTAGGA -ATAGACGTAGACACACGAGCATATTTCACCTCCGCTACCATAATCATCGCTATCCCCACCGGCGTCAAAG -TATTTAGCTGACTCGCCACACTCCACGGAAGCAATATGAAATGATCTGCTGCAGTGCTCTGAGCCCTAGG -ATTTATTTTTCTTTTCACCGTAGGTGGCCTGACTGGCATTGTATTAGCAAACTCATCACTAGACATCGTA diff --git a/bio/metadmg/getdamage/wrapper.py b/bio/metadmg/getdamage/wrapper.py index 9b93732bf0c..98b61b0388d 100644 --- a/bio/metadmg/getdamage/wrapper.py +++ b/bio/metadmg/getdamage/wrapper.py @@ -13,28 +13,16 @@ ref = snakemake.input.get("ref", "") if ref: - ref = f"-f {ref}" + ref = f"--fasta {ref}" with tempfile.TemporaryDirectory() as tmpdir: shell( - "metaDMG-cpp getdamage --threads {snakemake.threads} {ref} {extra} -o {tmpdir}/out {snakemake.input.aln} {log}" + "metaDMG-cpp getdamage --threads {snakemake.threads} {ref} {extra} --out_prefix {tmpdir}/out {snakemake.input.aln} {log}" ) - if snakemake.output.get("dmg"): - assert snakemake.output.dmg.endswith( - ".gz" - ), "'dmg' file is Gzip compressed, but extension is not '.gz'" - shell("cat {tmpdir}/out.bdamage.gz > {snakemake.output.dmg}") - - if snakemake.output.get("res"): - assert snakemake.output.res.endswith( - ".gz" - ), "'res' file is Gzip compressed, but extension is not '.gz'" - shell("cat {tmpdir}/out.res.gz > {snakemake.output.res}") - - if snakemake.output.get("stats"): - assert not snakemake.output.stats.endswith( - ".gz" - ), "'stats' file is NOT Gzip compressed, but extension is '.gz'" - shell("cat {tmpdir}/out.stat > {snakemake.output.stats}") + for output in snakemake.output: + for ext in [".bdamage.gz", ".res.gz", ".stat"]: + if output.endswith(ext): + shell("cat {tmpdir}/out{ext} > {output}") + continue diff --git a/bio/metadmg/lca/environment.yaml b/bio/metadmg/lca/environment.yaml index 1d9b3558d4d..7370c2427b7 100644 --- a/bio/metadmg/lca/environment.yaml +++ b/bio/metadmg/lca/environment.yaml @@ -3,4 +3,4 @@ channels: - bioconda - nodefaults dependencies: - - metadmg =0.3 + - metadmg =0.4 diff --git a/bio/metadmg/lca/meta.yaml b/bio/metadmg/lca/meta.yaml index 87cb037f17f..60c6551893e 100644 --- a/bio/metadmg/lca/meta.yaml +++ b/bio/metadmg/lca/meta.yaml @@ -12,7 +12,6 @@ output: - dmg: path to TSV file containing counts of mismatchs conditional on strand and cycle. - lca: path to TSV file with LCA results. - stat: path to TSV file with general stats. - - log: path to log file. params: - extra: additional program arguments. notes: | diff --git a/bio/metadmg/lca/test/Snakefile b/bio/metadmg/lca/test/Snakefile index f002c575772..65cf4733b38 100644 --- a/bio/metadmg/lca/test/Snakefile +++ b/bio/metadmg/lca/test/Snakefile @@ -8,12 +8,11 @@ rule metadmg_lca: output: res="results/lca/{sample}.out.gz", lca="results/lca/{sample}.lca.gz", - stats="results/lca/{sample}.tsv", - log="results/lca/{sample}.log", + stats="stats/lca/{sample}.tsv", log: "logs/lca/{sample}.log", params: - extra="-simscorelow 0.95 -simscorehigh 1.0 -minmapq 30 -howmany 30 -lca_rank genus -fix_ncbi 0", + extra="--sim_score_low 0.95 --sim_score_high 1.0 --min_mapq 30 --how_many 30 --lca_rank genus --fix_ncbi 0", threads: 4 resources: mem_mb=1024, diff --git a/bio/metadmg/lca/test/acc2taxid.tsv b/bio/metadmg/lca/test/acc2taxid.tsv index 7211a71a306..562f43d62ab 100644 --- a/bio/metadmg/lca/test/acc2taxid.tsv +++ b/bio/metadmg/lca/test/acc2taxid.tsv @@ -1 +1,2 @@ -NC_023100.1 1425170 +accession accession.version taxid gi +NC_023100 NC_023100.1 1425170 1 diff --git a/bio/metadmg/lca/test/names.dmp.gz b/bio/metadmg/lca/test/names.dmp.gz index bb43fd5167bfd17231726f898946e77e38e7c2ae..c7fd91932eaed0693df51a932aa58f48a72fbcb8 100644 GIT binary patch literal 946 zcmV;j15NxNiwFQPIz?pw1C>`xZ`(K!KBvDzr#*EM>um!;fHdtUhXxxU#oos@Yq6$C zm7-F(=#SqaMM;(=QVC!fr01LWiw>{;TmfVFji36&1wWNne?EkGlR_x_`;HQ>=y$pc zIFtnGQJ!v)@TJ*-Ji#URh$<8i2Y^@rsawE-@GbCTuoGn|_(a#ytLy03)dLn>A{J+C z8ZN=jmO-ry_!cR{?G3DF{%GE@d)X?FjD00h@LM?T%kxlY<-QPd4{kRsY}dk!DgD{& zgr9LHHMh=;<$0oA`j1FOwi1mE?weOiN?0$JV2ZH=|3e@5<*U zNn-7lq~*|dgGVj3GE_Tz$j^qp0z+*VGAzovJ2v@T3)b9M$~lmx#+J#`^|WO!@XnN^ zk*=b9v|P%9a(6jot3-!D5>NqD{yMYZM^U20J&@o*!bvxGl+IXs0ba1+S@6RdYzTC= zN?S2vIXDB>qfJAKrUX7)esPpWI>O(Gl2D1gv+6sbvOsU-G+29iS90pvMI8(b%%2R6 z)vkOqy%{`%JQ5lF6+A-yi!i3svdO108nX@jH?}3`r&1h;sW$AN2D5bi)NZWcAVpJM zCai=0ThSWT9tGEX0I)G|6a4*BQdI=uW<6)x45u?v7App9J+&lS=_quq>6Jo(n{(audY?z`u%v8fb1|u6LazkBcpD4qY_sroqPR%Oc<8UX^>>T2d z*aANfK)2LnmE}6pPo_SWal>f0C-2DiaJ_3t2X(v2!b@7UbE+H0L8!O!!xA7{}{4(|;H~ z(~663Z(yutRX8=EEoo?~^O2w>Y)j+;pHJLda08ECn?=t^cuk$r43jutg_`S=kVKG$ USKkGE1n#r>Kdr(7&}R++00KkIfB*mh literal 969 zcmV;)12+60iwFo-?+axD18!k$WpgfMZEygUS6gq>Fcf|szamdOftE{?G}1I_4A>r+ zn6!!a)6_JkjvX9lsE8lWv2$zM*x972vU|RBzq!H1zYAatzwuKaxZsEK;@`UfZ&C== z{hm|875zzbk3&h29;R`IgfC4F@&KpY6RJ=^ECA90NZkTei!q#$V6iTP+Fo&NqU%m8a zQeRt~HUf@t)7ygMOU!R{K24B##{nIzl_*<(CihplWW@Y}KV<7oZL_LghwBuHPatzq zDIyO$N^NzLMXS`5sVa!1z`YX65?CjHD!0Ra>97kT9~tCG0Y=Z7wL?@r;uT5e-R^r)p)hT6>@^1Gogz);(Xj1JYgJ2m-O3)b8>$~lmx#+FIr z^?A#j;H@c%LtRC;Xt|UH<@Rz&{FM%YB%nN~+;!H0??s6cw?Mo*2?yQWVLW5$33$PR zN5OYTurbhyAGcz}a&ShhLz{*aO$mIq{Gu=pb%dXZl2D1Qwd#AI>VVG3X|VS4rsUMI zi+UIs7+DmlU3oUW={F93NOgkbEeI3S|inA#fYuXEeU;HB{v+K zWo9b%bTIU}?(7lWistTwbl?Z~J|AQ@Owk-O)i99W#D+7ufiAR9lwr#|=5Q3nW|ePo zxD#o14zWmVfuBd9UFynDQXT0BQy3K%lXxN-IP#wAr*3fI$^T33QbXfxycA}eOb2KQ%_jykp3P;Etx>=Xj zRS^34N?i|=nA<1uF;ZMU0>8Y&)@01q=)o#jCB`j$cof)yV#lboFC}CMO_FZ7Dx|0c z-4d9Q+Jc^FN$F1^%LRdYnBn4TzrCw|*PcJPkJ0W%?tvy^$5x(OkS3XF&U>hsaIe}R ziq=u0|1f-}6&L;9z*x(wuxdbC;=okrJwZ#@hR7X0pSZK&MjpL3i-D2wn%3SECW|OM rL%zo~k@+*L)GfkpY?S(e=K3Tg5oF=vcmkh*hl2PIQ)3| {snakemake.output.dmg}") - - if snakemake.output.get("lca"): - assert snakemake.output.lca.endswith( - ".gz" - ), "'lca' file is Gzip compressed, but has no '.gz' extension" - shell("cat {tmpdir}/out.lca.gz > {snakemake.output.lca}") - - if snakemake.output.get("stats"): - assert not snakemake.output.stats.endswith( - ".gz" - ), "'stats' file is NOT Gzip compressed, but has '.gz' extension" - shell("cat {tmpdir}/out.stat > {snakemake.output.stats}") - - if snakemake.output.get("log"): - assert not snakemake.output.log.endswith( - ".gz" - ), "'log' file is NOT Gzip compressed, but has '.gz' extension" - shell("cat {tmpdir}/out.log > {snakemake.output.log}") + for output in snakemake.output: + for ext in [".bdamage.gz", ".lca.gz", ".stat"]: + if output.endswith(ext): + shell("cat {tmpdir}/out{ext} > {output}") + continue diff --git a/test.py b/test.py index 93cebebb0f5..85b4ae2459b 100644 --- a/test.py +++ b/test.py @@ -151,7 +151,7 @@ def test_metadmg_getdamage(): "-F", "results/getdamage/a.out.gz", "results/getdamage/a.dmg.gz", - "results/getdamage/a.tsv", + "stats/getdamage/a.tsv", ], ) @@ -168,8 +168,24 @@ def test_metadmg_lca(): "-F", "results/lca/a.out.gz", "results/lca/a.lca.gz", - "results/lca/a.tsv", - "results/lca/a.log", + "stats/lca/a.tsv", + ], + ) + + +@skip_if_not_modified +def test_metadmg_dfit(): + run( + "bio/metadmg/dfit", + [ + "snakemake", + "--cores", + "1", + "--use-conda", + "-F", + "results/dfit/a.out.gz", + "stats/dfit/a.dfit.tsv.gz", + "stats/dfit/a.boot.tsv.gz", ], )