Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add support for seqkit split2 #3575

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 22 additions & 9 deletions bio/seqkit/environment.linux-64.pin.txt
Original file line number Diff line number Diff line change
@@ -1,30 +1,43 @@
# This file may be used to create an environment using:
# $ conda create --name <env> --file <this file>
# platform: linux-64
# created-by: conda 24.9.0
# created-by: conda 24.9.2
@EXPLICIT
https://conda.anaconda.org/bioconda/linux-64/seqkit-2.9.0-h9ee0642_0.tar.bz2#14369282dc7a09cb3e117e4db9c9ddc2
https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81
https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.8.30-hbcca054_0.conda#c27d1c142233b5bc9ca570c6e2e0c244
https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.12.14-hbcca054_0.conda#720523eb0d6a9b0f6120c16b2aa4e7de
https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.43-h712a8e2_2.conda#048b02e3962f066da18efe3a21b77672
https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.13-5_cp313.conda#381bbd2a92c863f640a55b6ff3c35161
https://conda.anaconda.org/conda-forge/noarch/tzdata-2024b-hc8b5060_0.conda#8ac3367aafb1cc0a068483c580af8015
https://conda.anaconda.org/conda-forge/linux-64/libgomp-14.2.0-h77fa898_1.conda#cc3573974587f12dda90d96e3e55a702
https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2#73aaf86a425cc6e73fcf236a5a46396d
https://conda.anaconda.org/conda-forge/linux-64/libgcc-14.2.0-h77fa898_1.conda#3cb76c3f10d3bc7f1105b2fc9db984df
https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.34.2-heb4867d_0.conda#2b780c0338fc0ffa678ac82c54af51fd
https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.34.4-hb9d3cd8_0.conda#e2775acf57efd5af15b8e3d1d74d72d3
https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.23-h4ddbbb0_0.conda#8dfae1d2e74767e9ce36d5fa0d8605db
https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.4-h5888daf_0.conda#db833e03127376d461e1e13e76f09b6c
https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-14.2.0-h69a702a_1.conda#e39480b9ca41323497b05492a63bc35b
https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.6.3-hb9d3cd8_1.conda#2ecf2f1c7e4e21fcfe6423a51a992d84
https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-14.2.0-hc0a3c3a_1.conda#234a5554c53625688d51062645337328
https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-hb9d3cd8_2.conda#edb0dca6bc32e4f4789199455a1dbeb8
https://conda.anaconda.org/conda-forge/linux-64/openssl-3.3.2-hb9d3cd8_0.conda#4d638782050ab6faa27275bed57e9b4e
https://conda.anaconda.org/conda-forge/linux-64/openssl-3.4.0-hb9d3cd8_0.conda#23cc74f77eb99315c0360ec3533147a9
https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h4bc722e_7.conda#62ee74e96c5ebb0af99386de58cf9553
https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3
https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.21-h4bc722e_0.conda#36ce76665bf67f5aac36be7a0d21b7f3
https://conda.anaconda.org/conda-forge/linux-64/libev-4.33-hd590300_2.conda#172bf1cd1ff8629f2b1179945ed45055
https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.0-h0841786_0.conda#1f5a58e686b13bcfde88b93f547d23fe
https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3
https://conda.anaconda.org/conda-forge/linux-64/libmpdec-4.0.0-h4bc722e_0.conda#aeb98fdeb2e8f25d43ef71fbacbeec80
https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.47.2-hee588c1_0.conda#b58da17db24b6e08bcbf8fed2fb8c915
https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.1-hf672d98_0.conda#be2de152d8073ef1c01b7728475f2fe7
https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-14.2.0-h4852527_1.conda#8371ac6457591af2cf6159439c1fd051
https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b
https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-he02047a_1.conda#70caf8bb6cf39a0b6b7efc885f51c0fe
https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0
https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc
https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1
https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.64.0-h161d5f1_0.conda#19e57602824042dfd0446292ef90488b
https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4
https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.6-ha6fb4c9_0.conda#4d056880988120e29d75bfff282e0f45
https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.3-h659f571_0.conda#3f43953b7d3fb3aaa1d0d0723d91e368
https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.10.1-hbbe4b11_0.conda#6e801c50a40301f6978c53976917b277
https://conda.anaconda.org/bioconda/linux-64/htslib-1.21-h5efdd21_0.tar.bz2#06b995dc2244c024b45bbb3e53ae2f27
https://conda.anaconda.org/conda-forge/linux-64/python-3.13.1-ha99a958_102_cp313.conda#6e7535f1d1faf524e9210d2689b3149b
https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.11.1-h332b0f4_0.conda#2b3e0081006dc21e8bf53a91c83a055c
https://conda.anaconda.org/conda-forge/noarch/pip-24.3.1-pyh145f28c_2.conda#76601b0ccfe1fe13a21a5f8813cb38de
https://conda.anaconda.org/bioconda/noarch/snakemake-wrapper-utils-0.6.2-pyhdfd78af_0.tar.bz2#fd8759bbd04116eace828c4fab906096
https://conda.anaconda.org/bioconda/linux-64/htslib-1.21-h566b1c6_1.tar.bz2#944598fba531a668e8fafea92ca39bb4
1 change: 1 addition & 0 deletions bio/seqkit/environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ channels:
dependencies:
- seqkit =2.9.0
- htslib =1.21
- snakemake-wrapper-utils =0.6.2
19 changes: 19 additions & 0 deletions bio/seqkit/test/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -190,3 +190,22 @@ rule seqkit_concat:
threads: 2
wrapper:
"master/bio/seqkit"


rule seqkit_split2_part:
input:
fasta="data/{sample}.fa",
output:
fasta=[
"out/split2/part/{sample}.1-of-2.fas",
"out/split2/part/{sample}.2-of-2.fas",
],
log:
"logs/split/part/{sample}.log",
params:
command="split2",
extra="--by-part 2",
out_bgzip=True,
threads: 2
wrapper:
"master/bio/seqkit"
62 changes: 52 additions & 10 deletions bio/seqkit/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,35 @@
__copyright__ = "Copyright 2023, Filipe G. Vieira"
__license__ = "MIT"

from snakemake.shell import shell
import tempfile
from pathlib import Path
from snakemake.shell import shell
from snakemake_wrapper_utils.snakemake import is_arg


extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=False, stderr=True)

# subcommands concat and common use multiple input files

# Subcommands with multiple input files
if snakemake.params.command in ["concat", "common", "stats"]:
input = " ".join(snakemake.input)
# Subcommands with a single input file (if more than one provided, concat'em all)
elif snakemake.params.command in [
"stats",
"sum",
"rmdup",
"split",
"split2",
"sample",
"sort",
]:
fgvieira marked this conversation as resolved.
Show resolved Hide resolved
input = "<(cat " + " ".join(snakemake.input) + ")"
else:
input = snakemake.input[0]


# Extra input
extra_input = " ".join(
[
(
Expand All @@ -26,6 +42,8 @@
][1:]
)


# Extra output
extra_output = " ".join(
[
(
Expand All @@ -38,15 +56,32 @@
)


if snakemake.params.get("out_bgzip"):
assert Path(snakemake.output[0]).suffix in [
".gz",
".bgz",
".bgzip",
], "invalid output file extension"
input = input + f" | bgzip --threads {snakemake.threads} > {snakemake.output[0]}"
if snakemake.params.command in ["split", "split2"]:
# Check type of splitting
if is_arg("-i", extra) or is_arg("--by-id", extra):
split_by = "id"
elif is_arg("-p", extra) or is_arg("--by-part", extra):
split_by = "part"
elif is_arg("-r", extra) or is_arg("--by-region", extra):
split_by = "region"
elif is_arg("-s", extra) or is_arg("--by-size", extra):
split_by = "size"
elif is_arg("-l", extra) or is_arg("--by-length", extra):
split_by = "length"

out_dir = Path(snakemake.output[0]).parent
output = f"--out-dir {out_dir} --by-{split_by}-prefix output_part. --extension .fas"
else:
input = f"--out-file {snakemake.output[0]} " + input
if snakemake.params.get("out_bgzip"):
assert Path(snakemake.output[0]).suffix in [
".gz",
".bgz",
".bgzip",
], "invalid output file extension"
output = f"| bgzip --threads {snakemake.threads} > {snakemake.output[0]}"
else:
output = f"--out-file {snakemake.output[0]}"


shell(
"(seqkit {snakemake.params.command}"
Expand All @@ -55,5 +90,12 @@
" {extra_output}"
" {extra}"
" {input}"
" {output}"
") {log}"
)


# Rename output files
if snakemake.params.command in ["split", "split2"]:
for idx, output_file in enumerate(snakemake.output, start=1):
shell("mv {out_dir}/output_part.{idx:03d}.fas {output_file}")
14 changes: 14 additions & 0 deletions test_wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,20 @@ def test_seqkit_concat(run):
],
)

def test_seqkit_split2_part(run):
run(
"bio/seqkit",
[
"snakemake",
"--cores",
"2",
"--use-conda",
"-F",
"out/split2/part/a.1-of-2.fas",
"out/split2/part/a.2-of-2.fas",
],
)


def test_sickle_pe(run):
run(
Expand Down
Loading