-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSelect_OG_general.py
96 lines (84 loc) · 2.58 KB
/
Select_OG_general.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
###################################################################################################
# Originally written by Sidonie BELLOT - RBG Kew - [email protected]
# Use and modify as you wish, but please don't hesitate to give feedback!
# Screen trees for a list of outgroups and report the tree name + the most distant outgroup found
# Outgroup list has to be ordered from the most distant (clade of) outgroups to the closest ones to the ingroup
# Just ask me
# I think I replaced it by a new R script
################################################################################################################
import sys
from string import *
from Bio import SeqIO
import getopt
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from operator import itemgetter
import os
import re
inputdir = sys.argv[1] # tree folder
inputTax = sys.argv[2] # OG list
output = sys.argv[3] # OG table with OG for each tree
_nsre = re.compile('([0-9]+)')
def natural_sort_key(s):
return [int(text) if text.isdigit() else text.lower()
for text in re.split(_nsre, s)]
OG_tab = []
handleTax = open(inputTax, "r")
linesT = handleTax.readlines()
f_list = os.listdir(inputdir)
for input in f_list:
x = 0
TAX = []
input2 = "nex/" + str(input)
handle = open(input2, "r")
gene = str(input)
lines = handle.readlines()
for l in lines:
if "ntax" in l:
NTAX = l.split("ntax=")[1].split(";")[0]
for l in lines:
if x < (int(NTAX) + 4):
if x > 3:
l2 = l.split("\t")[1].split("\r\n")[0]
TAX.append(l2)
x = x + 1
else:
x = x + 1
print TAX
handle.close()
y = 0
for lT in linesT:
if y < 1 :
OG = lT.split("\n")[0]
print OG
if ";" in OG:
OG1 = OG.split(" ; ")[0]
OG2 = OG.split(" ; ")[1]
if OG1 in TAX:
if OG2 in TAX:
res = gene + "\t" + OG1 + ";" + OG2 + "\n"
OG_tab.append(res)
y = y + 1
else:
print "keep looking"
else:
print "keep looking"
else:
if OG in TAX:
res = gene + "\t" + OG + "\n"
OG_tab.append(res)
y = y + 1
else:
print "keep looking"
else:
print "found OG"
if y < 1 :
res = gene + "\tNOT-FOUND\n"
OG_tab.append(res)
else :
print "OG was found"
handleTax.close()
OG_tab.sort(key=natural_sort_key)
for item in OG_tab:
with open(output, "a") as fo:
fo.write(item)