-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtable.py
298 lines (243 loc) · 13.4 KB
/
table.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
from bokeh.models.widgets.tables import HTMLTemplateFormatter
from copy import deepcopy
import math
import numpy as np
import param
import pandas as pd
from collections import defaultdict
import panel as pn
from scipy import stats
from experimentdata import ExperimentData
from problemtable import ProblemTablereport
from report import Report
# TODO: is replacing 0 with something like 0.0001 a valid approach for gmean?
# TODO: fix warnings for gmean
class Tablereport(Report):
attributes = param.ListSelector()
domains = param.ListSelector()
precision = param.Integer(label="Floating point precision", default=3)
stylesheet = """
.tabulator-row.tabulator-selected .tabulator-cell{
background-color: #9abcea !important;
}
.tabulator-row:hover {
background-color: #bbbbbb !important;
}
.tabulator .tabulator-row.tabulator-selectable:hover .tabulator-cell{
background-color: #769bcc !important;
}
"""
def __init__(self, experiment_data = ExperimentData(), **params):
super().__init__(experiment_data, **params)
self.unfolded = dict() # which attributes/domains for each attribute are unfolded in the table
self.computed = dict() # per attribute: used aggregator, are domain aggregatos up to date
self.exp_data_dropna = pd.DataFrame() # same as self.experiment_data.data.dropna()
self.table = pd.DataFrame() # experiment data with aggregates, should be used as base data
self.previous_precision = -1 # used to find out if we need to reapply formatters
# ajaxLoader false is set to reduce blinking (https://github.com/olifolkerd/tabulator/issues/1027)
self.data_view = pn.widgets.Tabulator(
value=self.table, disabled = True, show_index = False,
pagination="remote", page_size=10000, frozen_columns=['Index'],
sizing_mode='stretch_both', configuration={"ajaxLoader":"False"},
sortable=False, stylesheets=[Tablereport.stylesheet]
)
self.data_view.add_filter(self.filter)
self.data_view.style.apply(func=self.style_table_by_row, axis=1)
self.data_view.on_click(self.on_click_callback)
self.param_view = pn.Column(
pn.Param(self.param.precision),
pn.pane.HTML("Attributes", styles={'font-size': '10pt', 'font-family': 'Arial', 'padding-left': '10px'}),
pn.widgets.CrossSelector.from_param(self.param.attributes, definition_order = False, width = 475, styles={'padding-left': '10px'}),
pn.pane.HTML("Domains", styles={'font-size': '10pt', 'font-family': 'Arial', 'padding-left': '10px'}),
pn.widgets.CrossSelector.from_param(self.param.domains, definition_order = False, width = 475, styles={'padding-left': '10px'}),
width=500
)
def set_experiment_data_dependent_parameters(self):
param_updates = super().set_experiment_data_dependent_parameters()
# Reset fields.
self.exp_data_dropna = self.experiment_data.data.dropna()
self.table = pd.DataFrame()
self.unfolded = dict()
self.computed = { attribute : {"aggregator": None, "domains_outdated" : True} for attribute in self.experiment_data.numeric_attributes }
self.computed["__columns"] = []
self.computed["__domains"] = []
self.previous_precision = -1
self.param.attributes.objects = self.experiment_data.attributes
self.param.attributes.default = self.experiment_data.attributes
self.param.domains.objects = self.experiment_data.domains
self.param.domains.default = self.experiment_data.domains
param_updates["attributes"] = self.experiment_data.attributes
param_updates["domains"] = self.experiment_data.domains
# Build the rows for the aggregated values such that we later just overwrite values rather than concatenate.
mi = pd.MultiIndex.from_product([self.experiment_data.attributes, ["--", *self.experiment_data.domains], ["--"]],
names = ["attribute", "domain", "problem"])
aggregated_data_skeleton = pd.DataFrame(data = "", index = mi, columns = self.experiment_data.algorithms)
# Combine experiment data and aggregated data skeleton.
self.table = pd.concat([self.experiment_data.data, aggregated_data_skeleton]).sort_index()
# Add Index column (solely used in the visualization).
pseudoindex = [x[0] if x[1]=="--" else (x[1] if x[2] == "--" else x[2]) for x in self.table.index]
self.table.insert(0, "Index", pseudoindex)
return param_updates
def update_algorithm_names(self, mapping):
self.table.rename(columns = mapping, inplace=True)
self.computed["__columns"] = [mapping[x] for x in self.computed["__columns"]]
def get_view_table(self):
return self.table
def get_current_columns(self):
return self.experiment_data.algorithms
def style_table_by_row(self, row):
# Give aggregates a different style, and indent Index col text if it's a domain or problem.
style = [""] * len(row)
if row.name[1] == "--":
style = [x + "font-weight: bold; background-color: #E6E6E6;" for x in style]
elif row.name[2] == "--":
style = [x + "font-weight: bold; background-color: #F6F6F6;" for x in style]
style[0] = style[0] + "text-indent:25px;"
else:
style[0] = "text-indent:50px;"
return style
def filter(self, df):
if df.empty:
return df
indices = [(a,"--","--") for a in self.attributes]
for a, doms in self.unfolded.items():
if a not in self.attributes:
continue
indices += [(a,d,"--") for d in self.domains]
for d in doms:
if d not in self.domains:
continue
indices += [(a,d,p) for p in self.experiment_data.problems[d]]
indices.sort()
max_length = max([len(x) for x in df.loc[indices]['Index']])
self.data_view.widths = {'Index': 10+max_length*7}
return df.loc[indices]
def on_click_callback(self, e):
row = self.table.iloc[e.row]
attribute, domain, problem = row.name[0:3]
# clicked on concrete problem -> open problem wise report
if problem != "--":
param_dict = {
"domain" : domain,
"problem": problem,
"algorithms": self.get_current_columns()
}
problem_report = ProblemTablereport(self.experiment_data, param_dict,
sizing_mode = "stretch_width")
self.add_popup(problem_report.view_data, name=f"{domain} - {problem}")
return
# clicked on domain aggregate -> (un)fold that domain for that attribute
if domain != "--":
if domain in self.unfolded[attribute]:
self.unfolded[attribute].remove(domain)
else:
self.unfolded[attribute].append(domain)
# clicked on attribute aggregate -> (un)fold that attribute
else:
if attribute in self.unfolded:
self.unfolded.pop(attribute)
else:
self.unfolded[attribute] = []
if attribute in self.experiment_data.numeric_attributes:
self.aggregate_domains_for_attribute(attribute)
self.view_data()
# Setting the selection makes the redrawn table jump to that row.
# TODO: unfortunately this results in blinking, can we do better?
self.data_view.selection = []
self.data_view.selection = [e.row]
def aggregate_where_necessary(self):
current_columns = self.get_current_columns()
columns_outdated = current_columns != self.computed["__columns"]
domains_outdated = self.domains != self.computed["__domains"]
self.computed["__columns"] = current_columns
self.computed["__domains"] = self.domains
# If the columns used for aggregation are outdated, recompute dropna to only consider current columns.
if columns_outdated:
unique_columns = list(dict.fromkeys(current_columns))
if not unique_columns:
return
self.exp_data_dropna = self.experiment_data.data[unique_columns].dropna()
cols_without_index = self.table.columns[1:]
for attribute in self.experiment_data.numeric_attributes:
aggregator = self.experiment_data.attribute_info[attribute].aggregator
if not columns_outdated and not domains_outdated and aggregator == self.computed[attribute]["aggregator"]:
continue
self.computed[attribute]["aggregator"] = aggregator
# Compute the overall aggregate.
attribute_data = None
index_string = f"{attribute} ({aggregator}, "
num_problems = 0
if attribute not in self.exp_data_dropna.index:
new_aggregates = np.NaN
else:
attribute_data = self.exp_data_dropna.loc[attribute]
attribute_data = attribute_data.loc[attribute_data.index.get_level_values('domain').isin(self.domains)]
attribute_data = attribute_data.apply(pd.to_numeric, errors='coerce')
num_problems = len(attribute_data.index)
# Since gmean is not a built-in function we need to set the variable to the actual function here.
if aggregator == "gmean":
aggregator = stats.gmean
attribute_data = attribute_data.replace(0,0.000001)
new_aggregates = attribute_data.agg(aggregator)
self.table.loc[(attribute, "--", "--"),cols_without_index] = new_aggregates
self.table.loc[(attribute, "--", "--"),"Index"] = index_string + f"{num_problems}/{self.experiment_data.num_problems})"
self.computed[attribute]["domains_outdated"] = True
if attribute in self.unfolded:
self.aggregate_domains_for_attribute(attribute, attribute_data)
def aggregate_domains_for_attribute(self, attribute, attribute_data = None):
# Represents the slice of all domain aggregate rows, but without the Index column.
rows, cols = (attribute, slice(self.experiment_data.domains[0], self.experiment_data.domains[-1]), "--"), self.table.columns[1:]
# This can happen if there are no problems where all columns have a value for the attribute.
if attribute not in self.exp_data_dropna.index:
self.table.loc[rows,cols] = np.NaN
return
if attribute_data is None:
attribute_data = self.exp_data_dropna.loc[attribute].apply(pd.to_numeric, errors='coerce')
aggregator = self.experiment_data.attribute_info[attribute].aggregator
# Since gmean is not a built-in function we need to set the variable to the actual function here.
if aggregator == "gmean":
aggregator = stats.gmean
attribute_data.replace(0,0.000001, inplace=True)
# Clear the slice and apply combine_first (this way, the newly aggregated data is taken wherever it exists).
self.table.loc[rows,cols] = np.NaN
self.table.loc[rows,cols] = self.table.loc[rows,cols].combine_first(attribute_data.groupby(level=0).agg(aggregator))
for domain in self.experiment_data.domains:
num_problems = len(self.experiment_data.problems[domain])
num_aggregated = 0 if domain not in attribute_data.index.get_level_values(0) else len(attribute_data.loc[domain].index)
self.table.loc[(attribute, domain, "--"),'Index'] = f"{domain} ({num_aggregated}/{num_problems})"
self.computed[attribute]["domains_outdated"] = False
def update_data_view(self):
self.aggregate_where_necessary()
new_table = self.get_view_table()
# we need to define formatters before setting the new table as new value
if self.precision != self.previous_precision:
self.previous_precision = self.precision
template = f"""
<%= function formatnumber() {{
f_val = parseFloat(value);
if (!isNaN(f_val)) {{
if (Number.isInteger(f_val)) {{
return '<div style="text-align:right">' + f_val + "</div>";
}} else {{
return '<div style="text-align:right">' + f_val.toFixed({self.precision}) + "</div>";
}}
}} else {{
return value;
}}
}}() %>
"""
self.data_view.formatters = {x: HTMLTemplateFormatter(template=template) for x in new_table.columns}
self.data_view.value = new_table
def get_params_as_dict(self):
params = super().get_params_as_dict()
# shorten the attributes parameter by using indices instead of the attribute names
if "attributes" in params:
params["attributes"] = [self.param.attributes.objects.index(a) for a in params["attributes"]]
if "domains" in params:
params["domains"] = [self.param.domains.objects.index(d) for d in params["domains"]]
return params
def set_params_from_dict(self, params):
if "attributes" in params:
params["attributes"] = [self.param.attributes.objects[x] for x in params["attributes"]]
if "domains" in params:
params["domains"] = [self.param.domains.objects[x] for x in params["domains"]]