Skip to content

Commit

Permalink
Deleted trailing Ns scripts and reformatted the VecScreen Python scri…
Browse files Browse the repository at this point in the history
…pts with black
  • Loading branch information
eeaunin committed Nov 16, 2023
1 parent 320e071 commit ddc2f30
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 218 deletions.
18 changes: 13 additions & 5 deletions bin/VSlistTo1HitPerLine.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import re
import argparse


def main(args):
hits_to_report = ""
ID = ""
Expand All @@ -27,14 +28,14 @@ def main(args):
line = line.strip()
Fld = line.split(" ")

if not re.match(r'^[0-9 \t]+$', line):
if not re.match(r"^[0-9 \t]+$", line):
hits_to_report = ""

if hits_to_report:
print(f"VecScreen_{hits_to_report.ljust(8)}\t{ID}\t{line}")
hits += 1
continue

if line.startswith(">Vector "):
if ID != "":
if error_found:
Expand Down Expand Up @@ -75,13 +76,20 @@ def main(args):

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Reformatting VecScreen's output")
parser.add_argument("vecscreen_output_file", type=str, help="Path to a raw output file from NCBI VecScreen (from a run with the -f3 flag)", default=None)
parser.add_argument(
"vecscreen_output_file",
type=str,
help="Path to a raw output file from NCBI VecScreen (from a run with the -f3 flag)",
default=None,
)
parser.add_argument("--skip_reporting_strong_hits", action="store_true", help="Skip reporting strong hits")
parser.add_argument("--skip_reporting_moderate_hits", action="store_true", help="Skip reporting moderate hits")
parser.add_argument("--skip_reporting_weak_hits", action="store_true", help="Skip reporting weak hits")
parser.add_argument("--skip_reporting_suspect_hits", action="store_true", help="Skip reporting hits of suspect origin")
parser.add_argument(
"--skip_reporting_suspect_hits", action="store_true", help="Skip reporting hits of suspect origin"
)
parser.add_argument("--skip_reporting_no_hits", action="store_true", help="Skip reporting no-hits")
parser.add_argument("--skip_reporting_errors", action="store_true", help="Skip reporting errors")
parser.add_argument("-v", "--version", action="version", version="1.0")
args = parser.parse_args()
main(args)
main(args)
52 changes: 27 additions & 25 deletions bin/chunk_assembly_for_vecscreen.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,42 +7,44 @@
import argparse
import os

def main(fasta_input_file, fasta_output_file):
fasta_input_file = os.path.abspath(fasta_input_file)
fasta_output_file = os.path.abspath(fasta_output_file)

threshold_length = 500000
overlap_length = int( threshold_length / 10 )
minimum_record_size = 11
def main(fasta_input_file, fasta_output_file):
fasta_input_file = os.path.abspath(fasta_input_file)
fasta_output_file = os.path.abspath(fasta_output_file)

fasta_output_handle = open(fasta_output_file, 'w')
threshold_length = 500000
overlap_length = int(threshold_length / 10)
minimum_record_size = 11

with open(fasta_input_file, 'r') as fasta_input_handle:
for record in SeqIO.parse(fasta_input_handle, "fasta"):
fasta_output_handle = open(fasta_output_file, "w")

if len(record) >= minimum_record_size:
records_to_write = []
with open(fasta_input_file, "r") as fasta_input_handle:
for record in SeqIO.parse(fasta_input_handle, "fasta"):
if len(record) >= minimum_record_size:
records_to_write = []

slice_count = 0
while (slice_count * threshold_length) < len(record) - (threshold_length+overlap_length):
record_slice = record[(slice_count*threshold_length):((slice_count+1)*threshold_length + overlap_length)]
record_slice.id += '.chunk_' + str(slice_count+1)
slice_count = 0
while (slice_count * threshold_length) < len(record) - (threshold_length + overlap_length):
record_slice = record[
(slice_count * threshold_length) : ((slice_count + 1) * threshold_length + overlap_length)
]
record_slice.id += ".chunk_" + str(slice_count + 1)

record_slice.description = ''
records_to_write.append(record_slice)
slice_count += 1
record_slice.description = ""
records_to_write.append(record_slice)
slice_count += 1

final_record_slice = record[(slice_count*threshold_length):]
final_record_slice = record[(slice_count * threshold_length) :]

if slice_count > 0:
final_record_slice.id += '.chunk_' + str(slice_count+1)
final_record_slice.description = ''
if slice_count > 0:
final_record_slice.id += ".chunk_" + str(slice_count + 1)
final_record_slice.description = ""

records_to_write.append(final_record_slice)
records_to_write.append(final_record_slice)

SeqIO.write(records_to_write, fasta_output_handle, 'fasta')
SeqIO.write(records_to_write, fasta_output_handle, "fasta")

fasta_output_handle.close()
fasta_output_handle.close()


if __name__ == "__main__":
Expand Down
13 changes: 11 additions & 2 deletions bin/summarise_vecscreen_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,17 @@ def main(vecscreen_file, chunk_size):

if __name__ == "__main__":
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("vecscreen_file", type=str, help="Path to output file of VecScreen (run with -f3 flag), filtered with VSlistTo1HitPerLine.awk")
parser.add_argument("--chunk_size", type=int, help="Chunk size of the chunked FASTA file that VecScreen was run with, in bp. Default: 500000", default=50000)
parser.add_argument(
"vecscreen_file",
type=str,
help="Path to output file of VecScreen (run with -f3 flag), filtered with VSlistTo1HitPerLine.awk",
)
parser.add_argument(
"--chunk_size",
type=int,
help="Chunk size of the chunked FASTA file that VecScreen was run with, in bp. Default: 500000",
default=50000,
)
parser.add_argument("-v", "--version", action="version", version="1.0")
args = parser.parse_args()
main(args.vecscreen_file, args.chunk_size)
144 changes: 0 additions & 144 deletions bin/trim_Ns.py

This file was deleted.

42 changes: 0 additions & 42 deletions modules/local/detect_trailing_n.nf

This file was deleted.

0 comments on commit ddc2f30

Please sign in to comment.