-
Notifications
You must be signed in to change notification settings - Fork 0
/
reprochart.py
executable file
·211 lines (206 loc) · 7.47 KB
/
reprochart.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import csv, itertools
import matplotlib.pyplot as plt
import matplotlib as mplt
import numpy as np
import argparse
import os
import io
# Convert to number
def to_number(x):
try:
return int(x)
except:
return float(x)
# Convert string to float or NaN if it can't
def to_float(x):
try:
return float(x)
except:
return np.nan
# Filter NaN points from the graph
def filter_nans(x,y):
pairs = filter(lambda p: not np.isnan(p[1]), zip(x,y))
if len(pairs) > 0:
return zip(*pairs)
else:
return ([], [])
def utf_8_encoder(unicode_csv_data):
for line in unicode_csv_data:
yield line.encode('utf-8')
def unicode_csv_reader(unicode_csv_data, dialect=csv.excel, **kwargs):
# csv.py doesn't do Unicode; encode temporarily as UTF-8:
csv_reader = csv.reader(utf_8_encoder(unicode_csv_data),
dialect=dialect, **kwargs)
for row in csv_reader:
# decode UTF-8 back to Unicode, cell by cell:
yield [unicode(cell, 'utf-8') for cell in row]
def process_chart(infile, outfile):
# Option keys:
FIGHEIGHT = 'figheight'
FIGWIDTH = 'figwidth'
TITLE = 'title'
XTITLE = 'xtitle'
YTITLE = 'ytitle'
FONTSIZE = 'fontsize'
XFONTSIZE = 'xfontsize'
YFONTSIZE = 'yfontsize'
XTITLESIZE = 'xtitlesize'
YTITLESIZE = 'ytitlesize'
LEGENDSIZE = 'legendsize'
XMARGIN = 'xmargin'
YMARGIN = 'ymargin'
XLOGSCALE = 'xlogscale'
YLOGSCALE = 'ylogscale'
YLIMMIN = 'ylimmin'
YLIMMAX = 'ylimmax'
YNUMTICKS = 'ynumticks'
PADDING = 'padding'
# Read the rows of the CSV file
rows = None
options = dict()
with io.open(infile, 'r', encoding='utf-8') as csvfile:
line = csvfile.readline()
while len(line) == 0 or line[0] != '=':
line = line.strip()
if len(line) == 0:
line = csvfile.readline()
elif ':' not in line:
raise ValueError('Above the line (===), only empty lines or key:value options are allowed!')
key, value = line.strip().split(':')
value = value.strip()
if len(value) >= 2 and value[0] in ['"', '"'] and value[0] == value[-1]:
value = value[1:-1]
elif value.lower() in ['true', 'false']:
value = (value == 'true')
else:
value = to_number(value)
options[key.lower()] = value
line = csvfile.readline()
reader = unicode_csv_reader(csvfile)
rows = [r for r in reader]
# Set defaults if not found:
if FIGHEIGHT not in options:
options[FIGHEIGHT] = 5
if FIGWIDTH not in options:
options[FIGWIDTH] = 5
if FONTSIZE not in options:
options[FONTSIZE] = 11
if YFONTSIZE not in options:
options[YFONTSIZE] = options[FONTSIZE]
if XFONTSIZE not in options:
options[XFONTSIZE] = options[FONTSIZE]
if YTITLESIZE not in options:
options[YTITLESIZE] = options[FONTSIZE]
if XTITLESIZE not in options:
options[XTITLESIZE] = options[FONTSIZE]
if LEGENDSIZE not in options:
options[LEGENDSIZE] = options[FONTSIZE]
if YMARGIN not in options:
options[YMARGIN] = 2
if XMARGIN not in options:
options[XMARGIN] = 0.3
if YLOGSCALE not in options:
options[YLOGSCALE] = False
if XLOGSCALE not in options:
options[XLOGSCALE] = False
if YLIMMIN not in options:
options[YLIMMIN] = 10
if YLIMMAX not in options:
options[YLIMMAX] = 100
if YNUMTICKS not in options:
options[YNUMTICKS] = 10
if PADDING not in options:
options[PADDING] = 0.1 # default from Matplotlib code
# Convert the rows to headers, x values and the different y values
headers = rows[0]
data = rows[1:]
cols = zip(*data)
#x = map(to_float, cols[0])
x = cols[0]
ys = [map(to_float, y) for y in cols[1:]]
# print LaTeX table
print(u'\\begin{tabular}{%s}' % ('c' * len(x)))
print(u'\\toprule')
print(u'%s & %s \\\\' % (headers[0], u' & '.join(map(str,x))))
print(u'\\midrule')
best = [np.amax([y[i] if not np.isnan(y[i]) else -np.inf for y in ys]) for i in xrange(len(x))]
for i in range(len(ys)):
fx, fy = filter_nans(range(len(x)), ys[i])
a = [' '] * len(x)
for j, y in zip(fx, fy):
formatstr = '\\textbf{%s}' if best[j] == y else '%s'
a[j] = formatstr % str(y)
name = unicode(headers[i+1]).replace(u'\u2020', '\\dag')
print(u'%s & %s \\\\' % (name, u' & '.join(a)))
print(u'\\bottomrule')
print(u'\\end{tabular}')
# Set font size of axes
mplt.rc('ytick', labelsize=options[YFONTSIZE])
mplt.rc('xtick', labelsize=options[XFONTSIZE])
# Set the figure size
plt.figure(figsize=(options[FIGWIDTH], options[FIGHEIGHT]))
# Set the titles
if TITLE in options:
plt.title(options[TITLE])
if XTITLE in options:
plt.xlabel(options[XTITLE], fontsize=options[XTITLESIZE])
if YTITLE in options:
plt.ylabel(options[YTITLE], fontsize=options[YTITLESIZE])
# Plot the series using unique colors and markers
cm = plt.get_cmap('nipy_spectral')
markers = ['v', 'o', '*', 's', 'x', '^','+']
linestyles = ['-', '-.', '--']
styles = itertools.cycle(itertools.product(linestyles, markers))
chosen_styles = list(reversed([styles.next() for i in xrange(len(ys))]))
for i in range(len(ys)):
fx, fy = filter_nans(range(len(x)), ys[i])
linestyle, marker = chosen_styles[i]
plt.plot(fx, fy,
color=cm(float(i) / len(ys)),
linestyle=linestyle, marker=marker,
label=headers[i+1])
# Setup y axis
plt.yticks(np.linspace(options[YLIMMIN], options[YLIMMAX], num=options[YNUMTICKS], endpoint=True), )
plt.ylim(options[YLIMMIN] - options[YMARGIN], options[YLIMMAX] + options[YMARGIN])
if options[YLOGSCALE]:
plt.yscale('log', basex=2)
# Setup x axis
plt.xticks(range(len(x)), x)
plt.xlim(-options[XMARGIN], len(x) - 1 + options[XMARGIN])
if options[XLOGSCALE]:
plt.xscale('log', basex=2)
# Set the figure to use a grid
plt.grid()
# Set the figure to use a legend and place it in the lower right corner
plt.legend(loc='lower left', prop={'size':options[LEGENDSIZE]})
# Save the plot to res.pdf using tight bounds around the figure
plt.savefig(outfile, bbox_inches='tight', pad_inches=options[PADDING])
def main():
parser = argparse.ArgumentParser(description='Process csv files with special headers into pdf charts and latex tables.')
# Required arguments: input and output files.
parser.add_argument(
"infile",
help="Input file."
)
parser.add_argument(
"--outfile",
help="Output file",
default="",
)
args = parser.parse_args()
args.infile = os.path.normpath(args.infile)
if not os.path.isfile(args.infile):
raise ValueError("Cannot find input file!")
if os.path.abspath(args.infile) == os.path.abspath(args.outfile):
raise ValueError("The infile and outfile cannot be the same!")
if args.outfile == "":
filename = os.path.splitext(args.infile)[0]
args.outfile = filename + '.pdf'
if len(args.outfile.strip()) == 0:
raise ValueError("Cannot use an empty string!")
process_chart(args.infile, args.outfile)
if __name__ == '__main__':
main()