-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathbed_to_primers.py
109 lines (92 loc) · 2.89 KB
/
bed_to_primers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/usr/bin/env python
# coding: utf-8
import pandas as pd
from Bio import SeqIO
from Bio.Seq import Seq
import sys
import argparse
def get_primers(reference, bed_file):
df_bed = pd.read_csv(
bed_file,
header=None,
names=[
"chrom",
"chrom_start",
"chrom_end",
"name",
"score",
"strand",
"primer",
],
sep="\t",
)
df_bed["primer_seq"] = ""
df_bed["size"] = ""
df_bed["%gc"] = "UNK"
df_bed["tm"] = "UNK"
sequence = []
fasta_sequences = SeqIO.parse(open(reference), "fasta")
for seq in fasta_sequences:
sequence.append(str(seq.seq))
for i in range(0, df_bed.shape[0]):
if "LEFT" in df_bed["name"][i]:
primer_seq = sequence[0][df_bed["chrom_start"][i] : df_bed["chrom_end"][i]]
size = df_bed["chrom_end"][i] - df_bed["chrom_start"][i]
df_bed["primer"][i] = primer_seq
df_bed["size"][i] = size
if "RIGHT" in df_bed["name"][i]:
primer_seq = sequence[0][df_bed["chrom_start"][i] : df_bed["chrom_end"][i]]
primer_seq = Seq(primer_seq).reverse_complement()
size = df_bed["chrom_end"][i] - df_bed["chrom_start"][i]
df_bed["primer"][i] = primer_seq
df_bed["size"][i] = size
df_bed = df_bed[["name", "chrom", "primer", "size", "%gc", "tm"]]
df_bed.columns = [["name", "pool", "seq", "size", "%gc", "tm"]]
df_bed.to_csv(bed_file.replace("bed", "tsv"), index=None, sep="\t", na_rep="UNK")
### Write out fasta file of primers
with open(bed_file.replace("bed", "fasta"), "w") as out_fasta:
for i in range(0, df_bed.shape[0]):
out_fasta.write(
">" + df_bed.loc[i][0] + "\n" + str(df_bed.loc[i][2]) + "\n"
)
def parse_arguments(system_args):
"""
Parse command line arguments
"""
usage = """Takes a bed file and associated reference and creates primer sequence files.
Example usage:
bed_to_primers.py -i neb_vss1a_primer.bed -r NC_045512.2_sequence.fasta """
parser = argparse.ArgumentParser(
description=usage, formatter_class=argparse.RawTextHelpFormatter
)
parser.add_argument(
"-i",
"--bed_file",
metavar="STR",
dest="bed_file",
default=None,
required=True,
help="bed file.",
)
parser.add_argument(
"-r",
"--reference",
metavar="STR",
dest="reference",
default=None,
required=True,
help="Name of reference fasta file",
)
args = parser.parse_args(system_args)
return args
def main(args):
"""
Generate results
"""
# All args
bed_file = args.bed_file
reference = args.reference
get_primers(reference, bed_file)
if __name__ == "__main__":
args = parse_arguments(sys.argv[1:])
main(args)