-
Notifications
You must be signed in to change notification settings - Fork 27
/
Copy pathevaluate_2021_LA.py
130 lines (98 loc) · 4.9 KB
/
evaluate_2021_LA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/usr/bin/env python
"""
Script to compute pooled EER and min tDCF for ASVspoof2021 LA.
Usage:
$: python PATH_TO_SCORE_FILE PATH_TO_GROUDTRUTH_DIR phase
-PATH_TO_SCORE_FILE: path to the score file
-PATH_TO_GROUNDTRUTH_DIR: path to the directory that has tje CM protocol and ASV score.
Please follow README, download the key files, and use ./keys
-phase: either progress, eval, or hidden_track
Example:
$: python evaluate.py score.txt ./keys eval
"""
import sys, os.path
import numpy as np
import pandas
import eval_metric_LA as em
from glob import glob
if len(sys.argv) != 4:
print("CHECK: invalid input arguments. Please read the instruction below:")
print(__doc__)
exit(1)
submit_file = sys.argv[1]
truth_dir = sys.argv[2]
phase = sys.argv[3]
asv_key_file = os.path.join(truth_dir, 'ASV/trial_metadata.txt')
asv_scr_file = os.path.join(truth_dir, 'ASV/ASVTorch_Kaldi/score.txt')
cm_key_file = os.path.join(truth_dir, 'CM/trial_metadata.txt')
Pspoof = 0.05
cost_model = {
'Pspoof': Pspoof, # Prior probability of a spoofing attack
'Ptar': (1 - Pspoof) * 0.99, # Prior probability of target speaker
'Pnon': (1 - Pspoof) * 0.01, # Prior probability of nontarget speaker
'Cmiss': 1, # Cost of tandem system falsely rejecting target speaker
'Cfa': 10, # Cost of tandem system falsely accepting nontarget speaker
'Cfa_spoof': 10, # Cost of tandem system falsely accepting spoof
}
def load_asv_metrics():
# Load organizers' ASV scores
asv_key_data = pandas.read_csv(asv_key_file, sep=' ', header=None)
asv_scr_data = pandas.read_csv(asv_scr_file, sep=' ', header=None)[asv_key_data[7] == phase]
idx_tar = asv_key_data[asv_key_data[7] == phase][5] == 'target'
idx_non = asv_key_data[asv_key_data[7] == phase][5] == 'nontarget'
idx_spoof = asv_key_data[asv_key_data[7] == phase][5] == 'spoof'
# Extract target, nontarget, and spoof scores from the ASV scores
tar_asv = asv_scr_data[2][idx_tar]
non_asv = asv_scr_data[2][idx_non]
spoof_asv = asv_scr_data[2][idx_spoof]
eer_asv, asv_threshold = em.compute_eer(tar_asv, non_asv)
[Pfa_asv, Pmiss_asv, Pmiss_spoof_asv, Pfa_spoof_asv] = em.obtain_asv_error_rates(tar_asv, non_asv, spoof_asv, asv_threshold)
return Pfa_asv, Pmiss_asv, Pmiss_spoof_asv, Pfa_spoof_asv
def performance(cm_scores, Pfa_asv, Pmiss_asv, Pfa_spoof_asv, invert=False):
bona_cm = cm_scores[cm_scores[5]=='bonafide']['1_x'].values
spoof_cm = cm_scores[cm_scores[5]=='spoof']['1_x'].values
if invert==False:
eer_cm = em.compute_eer(bona_cm, spoof_cm)[0]
else:
eer_cm = em.compute_eer(-bona_cm, -spoof_cm)[0]
if invert==False:
tDCF_curve, _ = em.compute_tDCF(bona_cm, spoof_cm, Pfa_asv, Pmiss_asv, Pfa_spoof_asv, cost_model, False)
else:
tDCF_curve, _ = em.compute_tDCF(-bona_cm, -spoof_cm, Pfa_asv, Pmiss_asv, Pfa_spoof_asv, cost_model, False)
min_tDCF_index = np.argmin(tDCF_curve)
min_tDCF = tDCF_curve[min_tDCF_index]
return min_tDCF, eer_cm
def eval_to_score_file(score_file, cm_key_file):
Pfa_asv, Pmiss_asv, Pmiss_spoof_asv, Pfa_spoof_asv = load_asv_metrics()
cm_data = pandas.read_csv(cm_key_file, sep=' ', header=None)
submission_scores = pandas.read_csv(score_file, sep=' ', header=None, skipinitialspace=True)
if len(submission_scores) != len(cm_data):
print('CHECK: submission has %d of %d expected trials.' % (len(submission_scores), len(cm_data)))
exit(1)
# check here for progress vs eval set
cm_scores = submission_scores.merge(cm_data[cm_data[7] == phase], left_on=0, right_on=1, how='inner')
min_tDCF, eer_cm = performance(cm_scores, Pfa_asv, Pmiss_asv, Pfa_spoof_asv)
out_data = "min_tDCF: %.4f\n" % min_tDCF
out_data += "eer: %.2f\n" % (100*eer_cm)
print(out_data, end="")
# just in case that the submitted file reverses the sign of positive and negative scores
min_tDCF2, eer_cm2 = performance(cm_scores, Pfa_asv, Pmiss_asv, Pfa_spoof_asv, invert=True)
if min_tDCF2 < min_tDCF:
print(
'CHECK: we negated your scores and achieved a lower min t-DCF. Before: %.3f - Negated: %.3f - your class labels are swapped during training... this will result in poor challenge ranking' % (
min_tDCF, min_tDCF2))
if min_tDCF == min_tDCF2:
print(
'WARNING: your classifier might not work correctly, we checked if negating your scores gives different min t-DCF - it does not. Are all values the same?')
return min_tDCF
if __name__ == "__main__":
if not os.path.isfile(submit_file):
print("%s doesn't exist" % (submit_file))
exit(1)
if not os.path.isdir(truth_dir):
print("%s doesn't exist" % (truth_dir))
exit(1)
if phase != 'progress' and phase != 'eval' and phase != 'hidden_track':
print("phase must be either progress, eval, or hidden_track")
exit(1)
_ = eval_to_score_file(submit_file, cm_key_file)