-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathevaluate.py
221 lines (178 loc) · 6.3 KB
/
evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
__author__ = 'Jihyun Park'
__email__ = '[email protected]'
import numpy as np
from utils import get_lab_arr, save_sq_mat_with_labels, get_marginals
from sklearn.metrics import precision_score, recall_score, roc_auc_score, f1_score, confusion_matrix
def R_precision(true_y, pred_y):
"""
Given two flat np.arrays, calculate R-precision score.
Parameters
----------
true_y : np.array
True y's (true labels)
pred_y : np.array
Predicted y's (predicted labels)
Returns
-------
float
"""
R = np.sum(true_y)
trueidxs = np.where(true_y)[0]
retrieved_R_docs = np.argsort(pred_y)[::-1][:int(R)]
n_true = len(set(retrieved_R_docs).intersection(set(trueidxs)))
return n_true / float(R)
def get_accuracy(true_y, pred_y):
"""
Given two lists or np.arrays, calculate the accuracy.
Parameters
----------
true_y : np.array
True y's (true labels)
pred_y : np.array
Predicted y's (predicted labels)
Returns
-------
float
"""
assert len(true_y) == len(pred_y)
numcorr = lambda a, b: np.where(np.array(a) == np.array(b))[0].shape[0]
return numcorr(true_y, pred_y) / float(len(true_y)) * 100.0
def get_accuracy_per_lab(true_y, pred_y, n_labels):
"""
Given two np.arrays, calculate an accuracy per label.
Returns a list of accuracies with size n_label.
`true_y` and `pred_y` should have values ranging from 0 to n_labels-1
Parameters
----------
true_y : np.array
True y's (true labels)
pred_y : np.array
Predicted y's (predicted labels)
n_labels : int
Number of labels
Returns
-------
list[int]
"""
true_y_arr = get_lab_arr(true_y, n_labels)
yhat_arr = get_lab_arr(pred_y, n_labels)
accs = []
n_utter = np.sum(true_y_arr, axis=0)
for tidx in range(n_labels):
accs.append(get_accuracy(true_y_arr[:, tidx], yhat_arr[:, tidx]))
return accs, n_utter
def get_binary_classification_scores(true_y, pred_y, n_labels):
"""
Given two np.arrays, calculate per-label binary scores,
and return as a dictionary of arrays with size n_labels.
`true_y` and `pred_y` should have values ranging from 0 to n_labels-1
Parameters
----------
true_y : np.array
True y's (true labels)
pred_y : np.array
Predicted y's (predicted labels)
n_labels : int
Number of labels
Returns
-------
dict[str: np.array]
Dictionary with keys "precision", "recall", "auc", "rprecision", "f1score".
Values are np.arrays with size `n_labels`.
"""
true_y_arr = get_lab_arr(true_y, n_labels)
yhat_arr = get_lab_arr(pred_y, n_labels)
precisions = []
recalls = []
aucs = []
rprecisions = []
fscores = []
for tidx in range(n_labels):
if sum(true_y_arr[:, tidx]) == 0:
print("WARNING: label index %d has 0 instance in the data. Binary scores for this label is set to 0.0" % tidx)
precisions.append(0.0)
recalls.append(0.0)
fscores.append(0.0)
aucs.append(0.0)
rprecisions.append(0.0)
else:
precisions.append(precision_score(true_y_arr[:, tidx], yhat_arr[:, tidx]))
recalls.append(recall_score(true_y_arr[:, tidx], yhat_arr[:, tidx]))
fscores.append(f1_score(true_y_arr[:, tidx], yhat_arr[:, tidx]))
aucs.append(roc_auc_score(true_y_arr[:, tidx], yhat_arr[:, tidx]))
rprecisions.append(R_precision(true_y_arr[:, tidx], yhat_arr[:, tidx]))
return {"precision":np.array(precisions), "recall":np.array(recalls),
"auc":np.array(aucs), "rprecision":np.array(rprecisions), "f1score":np.array(fscores)}
def get_overall_scores_in_diff_metrics(true_y, pred_y, tr_doc_label_mat):
"""
Get all the scores: accuracy and
both weighted and non-weighted average of all the binary scores available.
Parameters
----------
true_y : np.array
True y's (true labels)
pred_y : np.array
Predicted y's (predicted labels)
tr_doc_label_mat : np.array
Document-label matrix for training data
Returns
-------
dict[str:float]
"""
n_states = tr_doc_label_mat.shape[1]
marginals = get_marginals(tr_doc_label_mat)
results = {}
acc = get_accuracy(true_y, pred_y)
results["accuracy"] = acc
scores = get_binary_classification_scores(true_y, pred_y, n_states)
for sc in sorted(scores.keys()):
weighted = get_weighted_avg(scores[sc], marginals)
notweighted = np.mean(scores[sc])
results[sc+"_w"] = weighted
results[sc] = notweighted
return results
def print_row_of_diff_metrics(model_name, result_numbers, headers=None, filename="./overall_result.csv"):
if headers is None:
bin_metrics = ["precision", "recall", "auc", "rprecision", "f1score"]
headers = ["model", "accuracy"] + [met+"_w" for met in bin_metrics] + bin_metrics
print(",".join(headers))
with open(filename, 'a') as f:
f.write(model_name)
print(model_name),
for met in headers[1:]:
f.write(",%.4f" % result_numbers[met])
print(",%.4f" % result_numbers[met]),
f.write("\n")
def get_weighted_avg(score_list, weights):
"""
Get weighted average of scores using the weights.
"""
return np.dot(score_list, weights)
def get_weighted_avg_from_ymat(score_list, doc_label_mat):
"""
Get weighted average, where the weights are from the marginal probabilities of training data.
"""
weights = get_marginals(doc_label_mat)
weighted_avg = np.dot(score_list, weights)
return weighted_avg
def save_confusion_matrix(true_y, pred_y, lid2shortname, filename):
"""
Save confusion matrix given true labels and the predicted labels.
Parameters
----------
true_y : np.array
True y's (true labels)
pred_y : np.array
Predicted y's (predicted labels)
lid2shortname : dict[int, str]
Short names for each label index
filename : str
Path to the file where the confusion matrix will be saved.
Returns
-------
np.array[float]
with size (n_labels, n_labels)
"""
conf = confusion_matrix(true_y, pred_y)
save_sq_mat_with_labels(conf, lid2shortname, filename)
return conf