-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmetamorphic_eval.py
87 lines (70 loc) · 2.96 KB
/
metamorphic_eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
from eval import evaluate_one
import argparse, json, copy, time
from multiprocessing import Pool
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-o', '--output', dest='output', type=str)
parser.add_argument('-t', '--table', dest='table', type=str)
args = parser.parse_args()
tables = args.table
with open(tables) as f:
j = json.load(f)
tables = {db["db_id"]:db for db in j}
output = args.output
with open(output) as f:
data = [line.strip().split("\t") for line in f.readlines() if line.strip() != ""]
print(len([d for d in data if "orig" in d[0]]))
def evaluate(data_entry):
return (data_entry[0], evaluate_one(data_entry[1], data_entry[2], tables[data_entry[3]]), data_entry[4])
with Pool(processes=24) as pool:
results = pool.map(evaluate, data)
testcase_dict = {}
for i, rlt in enumerate(results):
testcase, score, _ = rlt
if "orig" in testcase:
testcase_dict[testcase.split("__")[0]] = (i, score[0], score[2])
hardness_stat = {'easy':[0,0], 'medium':[0,0], 'hard':[0,0], 'extra':[0,0], 'all':[0,0], "unknown": [0,0]}
method_stat = {}
method_acc_stat = {}
ood_stat = {"orig": copy.deepcopy(hardness_stat)}
for i, rlt in enumerate(results):
testcase, score, method = rlt
if "orig" not in testcase:
idx = testcase.split("__")[0]
if idx in testcase_dict:
orig_idx, orig_score, _ = testcase_dict[idx]
hardness_stat[_][1] += 1
hardness_stat["all"][1] += 1
if method not in method_stat:
method_stat[method] = [0,1]
method_acc_stat[method] = [0,1]
else:
method_stat[method][1] += 1
method_acc_stat[method] = [0,1]
if score[0] == 1:
method_acc_stat[method][0] += 1
if score[0] != orig_score:
hardness_stat[_][0] += 1
hardness_stat["all"][0] += 1
method_stat[method][0] += 1
for k in hardness_stat:
if hardness_stat[k][1] != 0:
print(f"{k}: {hardness_stat[k]}, {hardness_stat[k][0]/hardness_stat[k][1]}")
print("cons")
for k in method_stat:
if method_stat[k][1] != 0:
print(f"{k}: {method_stat[k]}, {method_stat[k][0]/method_stat[k][1]}")
for rlt in results:
testcase, score, method = rlt
if "orig" in testcase:
ood_stat["orig"][score[2]][1] += 1
ood_stat["orig"]["all"][1] += 1
if score[0] == 1:
ood_stat["orig"][score[2]][0] += 1
ood_stat["orig"]["all"][0] += 1
print()
a1 = 0
for k in ood_stat:
for m in ood_stat[k]:
if ood_stat[k][m][1] != 0:
print(f"{k}-{m}: {ood_stat[k][m]}, {ood_stat[k][m][0]/ood_stat[k][m][1]}")