-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtqd
executable file
·170 lines (131 loc) · 6.44 KB
/
tqd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
#!/usr/bin/env python
# wrapper that calls tqd-c (implementation in C)
import os
import sys
import argparse
import shlex
import shutil
import time
import subprocess
import glob
import multiprocessing
import tempfile
# position of the script -------------------------------------------------------
path_mOTUs = os.path.realpath(__file__)
path_array = path_mOTUs.split("/")
relative_path = "/".join(path_array[0:-1])
relative_path = relative_path + "/"
# check if setup has been ran already ------------------------------------------
if not(os.path.isfile(relative_path+'tqd-c')):
sys.stderr.write("Error: Run './setup' before using tqd\n\n")
sys.exit(1)
# message of the script --------------------------------------------------------
class CapitalisedHelpFormatter(argparse.HelpFormatter):
def add_usage(self, usage, actions, groups, prefix=None):
if prefix is None:
prefix = ''
return super(CapitalisedHelpFormatter, self).add_usage(usage, actions, groups, prefix)
def msg(name=None):
str_msg = '''
\00
Program: tqd - threshold q-gram distance
Usage: tqd <first_file> <second_file> [options]
Options:
-q INT value of q in the q-grams[10]
-threshold INT value of the threshold [1]
-strings set if the inputs are strings instead of files
-pair_status FILE file to save the pair statuses
-verbose INT verbose level: 1=error, 2=warning, 3=message, 4+=debugging [2]
'''
return str_msg
# ------------------------------------------------------------------------------
# MAIN
# ------------------------------------------------------------------------------
def main(argv=None):
# parse input parameters
parser = argparse.ArgumentParser(usage=msg(), formatter_class=CapitalisedHelpFormatter,add_help=False)
parser.add_argument('first_file', action="store", default=None, help='first file/string')
parser.add_argument('second_file', action="store", default=None, help='second file/string')
parser.add_argument('-q', type=int, action="store", dest='q', default=None, help='value of q in the q-grams')
parser.add_argument('-threshold', type=int, action="store", dest='threshold', default=None, help='value of the threshold')
parser.add_argument('-strings', action='store_true', default=None, dest='input_strings', help='set if the inputs are strings instead of files')
parser.add_argument('-pair_status', action='store', default=None, dest='pair_status', help='save the pair statuses')
parser.add_argument('-verbose', action='store', type=int, default=None, dest='verbose', help='Verbose levels')
args = parser.parse_args()
# set defaults -------------------------------------------------------------
strings = True
pair_status_file = ""
if (args.verbose is None): args.verbose = 2
if (args.q is None): args.q = 10
if (args.threshold is None): args.threshold = 1
if (args.input_strings is None): strings = False
if (args.pair_status is None): pair_status_file = "no_pair_status"
# check the file to find the maximum length for the string -----------------
if args.verbose > 2: sys.stderr.write("check input files.\n")
max_string_length = 0
if strings:
max_string_length = max(len(args.first_file),len(args.second_file)) + 10
else:
# we run the script to find the maximum length
# string 1 --------------------------------------
call_check_file = relative_path + 'check_input_file ' + args.first_file + " 2"
if args.verbose > 3: sys.stderr.write("call:\n"+call_check_file+"\n")
call_checkCMD = shlex.split(call_check_file)
call_check_cmd = subprocess.Popen(call_checkCMD, stdout=subprocess.PIPE)
out1 = call_check_cmd.stdout.readline().decode("utf-8").rstrip()
return_code = call_check_cmd.wait()
if return_code: sys.exit(1)
# string 2 --------------------------------------
call_check_file = relative_path + 'check_input_file ' + args.second_file + " 2"
if args.verbose > 3: sys.stderr.write("call:\n"+call_check_file+"\n")
call_checkCMD = shlex.split(call_check_file)
call_check_cmd = subprocess.Popen(call_checkCMD, stdout=subprocess.PIPE)
out2 = call_check_cmd.stdout.readline().decode("utf-8").rstrip()
return_code = call_check_cmd.wait()
if return_code: sys.exit(1)
# find max length --------------------------------------
max_string_length = max(int(out1),int(out2)) + 10
# create a temporary file where to save the pair statuses ------------------
if pair_status_file != "no_pair_status":
pair_status_temp_file = tempfile.NamedTemporaryFile(delete=False, mode="w")
os.chmod(pair_status_temp_file.name, 0o644)
pair_status_file = pair_status_temp_file.name
# call tqd-c ---------------------------------------------------------------
call_tqd = relative_path + 'tqd-c ' + str(args.q) + " " + str(args.threshold)
if strings:
call_tqd = call_tqd + " 1 "
else:
call_tqd = call_tqd + " 2 "
call_tqd = call_tqd + args.first_file + " " + args.second_file +" "+ str(args.verbose)
# we add the maximum length for the string
call_tqd = call_tqd +" "+ str(max_string_length)
# add file where to save the pair statuses
call_tqd = call_tqd +" "+ pair_status_file
if args.verbose > 3:
sys.stderr.write("call:\n"+call_tqd+"\n")
sys.stderr.write("\n- inside tqd-c:\n")
call_tqdCMD = shlex.split(call_tqd)
call_tqd_cmd = subprocess.Popen(call_tqdCMD)
return_code = call_tqd_cmd.wait()
if return_code: sys.exit(1)
# move the temp file with the statuses to the final destination----------------
if pair_status_file != "no_pair_status":
# close file
try:
pair_status_temp_file.flush()
os.fsync(pair_status_temp_file.fileno())
pair_status_temp_file.close()
except:
if args.verbose > 1: sys.stderr.write("Error when closing pair_statuses file\n")
sys.exit(1)
#move to the final destination
try:
shutil.move(pair_status_temp_file.name, args.pair_status)
except:
if verbose>1: sys.stderr.write("Error when copying pair_statuses file to the final destination\n")
sys.exit(1)
return 0 # success
#-------------------------------- run main -------------------------------------
if __name__ == '__main__':
status = main()
sys.exit(status)