-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathparse_output.py
120 lines (98 loc) · 4.59 KB
/
parse_output.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# -*- coding: utf-8 -*-
"""
parse_output.py
Parses clingo output into Pickle and/or JSON
"""
import numpy as np
import pandas, re
from argparse import ArgumentParser
import pickle, sys, warnings
def get_flux_vectors(fname):
"""
Reads the Clingo[LP] output and gets the flux vector for each solution
TODO: Not use this function and get answers with clingo API instead
Params:
fname: input file name, output from Clingo[LP]
Returns:
fluxes: list of flux vectors
names: list of reaction or enzyme subset names
"""
f = open(fname)
lines = f.readlines()
fluxes = []
names = []
# uses the fact that the min function is 0 and always returns 0
# after beginning brace index
bb_idx = len('(0.0, {')
# this filtering is a bit too specific to be reused...
lines_flux = filter(lambda w: w.startswith("(0.0,"), lines)
spl = None
for l in lines_flux:
# end brace index
eb_idx = l.rindex('})')
# remove beginning and ending braces
l = l[bb_idx:eb_idx]
spl = l.split(",")
ls = list(map(lambda w: float(w.split(":")[1]), spl))
fluxes.append(np.array(ls))
if spl is None:
raise ValueError('unsat' if any('UNSATISFIABLE' in x for x in lines) else 'error') # Clingo LP output format error
names = spl
def get_word_in_quotes(e):
res = re.compile(r'flux\(("?.*)"?\)').search(e)
res = "None" if res is None else res.group(1)
if res.startswith('"') and res.endswith('"'): res = res[1:-1]
return res
names = list(map(get_word_in_quotes, names))
f.close()
return fluxes, names
def write_flux_vectors(inname, pklfile, jsonfile, csvfile, split_revs, normalize_max, normalize_biom):
"""
Writes the flux vectors retrieved from a clingo[LP] output
Params:
inname: input file name, output from Clingo[LP]
pklfile: file to store flux modes in pickle format
jsonfile: file to store flux modes in json format
csvfile: file to store flux modes in csv format
split_revs: do not merge reversible reaction fluxes
normalize_max: if toggled, normalize output flux vectors by their maximum flux value
normalize_biom: biomass reaction name, if exists, normalize output flux vectors by their biomass flux value
"""
fluxes, names = get_flux_vectors(inname)
fvs = np.vstack(fluxes)
pfvs = pandas.DataFrame(data=fvs, columns=names)
pfvs = pfvs.reindex(sorted(pfvs.columns), axis=1)
if normalize_biom is not None:
pfvs = pfvs.apply(lambda x: x/x[normalize_biom], axis=1) # note: NaN producing if flux(biom) == 0
elif normalize_max:
pfvs = pfvs.apply(lambda x: x/x.max(), axis=1)
if not split_revs:
names = pfvs.columns[~pfvs.columns.str.endswith('rev')]
revs = pfvs.columns[pfvs.columns.str.endswith('rev')]
drev = pfvs.copy()[revs] # type: pandas.DataFrame
drev = -drev # type: ignore
drev.columns = drev.columns.map(lambda x: x[:-4])
pfvs = pfvs[names]
pfvs[drev.columns] += drev
if pklfile is not None:
with open(pklfile, 'wb') as pkl:
pickle.dump(pfvs, pkl)
if jsonfile is not None:
with open(jsonfile, 'w') as js:
pfvs.to_json(path_or_buf=js, orient='records')
if csvfile is not None:
with open(csvfile, 'w') as csv:
pfvs.to_csv(path_or_buf=csv)
if pklfile is None and jsonfile is None and csvfile is None:
warnings.warn('Please specify a format to convert to (pickle, json, csv) along with a filename')
if __name__== "__main__":
parser = ArgumentParser()
parser.add_argument('infile', metavar='clingoLP.file', help='Input file name')
parser.add_argument('--pickle', default=None, metavar='Pickle.file', help='Store flux modes with pickle')
parser.add_argument('--json', default=None, metavar='JSON.file', help='Store flux modes in JSON format')
parser.add_argument('--csv', default=None, metavar='CSV.file', help='Store flux modes in CSV format')
parser.add_argument('--normalize-max', action='store_true', help='Normalize reaction fluxes by their maximum flux (toggle)')
parser.add_argument('--normalize-biom', default=None, help='Normalize reaction fluxes by their biomass flux (specify name)')
parser.add_argument('--split-revs', action='store_true', help='Merges reversible reaction fluxes')
opts = parser.parse_args()
write_flux_vectors(opts.infile, opts.pickle, opts.json, opts.csv, opts.split_revs, opts.normalize_max, opts.normalize_biom)