Skip to content

Commit

Permalink
feat(): change feature type to cqt spectrogram and fix some problems …
Browse files Browse the repository at this point in the history
…in min-tDCF
  • Loading branch information
pedramabdzadeh committed Oct 1, 2021
1 parent 26bc6a9 commit b280006
Show file tree
Hide file tree
Showing 6 changed files with 323 additions and 266 deletions.
106 changes: 57 additions & 49 deletions evaluate_tDCF_asvspoof19.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,45 @@
import os
import sys
import numpy as np
import evaluation_metrics as em
import eval_metrics as em
import matplotlib.pyplot as plt

def compute_eer_and_tdcf(cm_score_file, path_to_database):
asv_score_file = os.path.join(path_to_database, 'ASVspoof2019.scores.txt')
def evaluate_tDCF_asvspoof19(cm_score_file, asv_score_file, legacy):

# Fix tandem detection cost function (t-DCF) parameters
Pspoof = 0.05
cost_model = {
'Pspoof': Pspoof, # Prior probability of a spoofing attack
'Ptar': (1 - Pspoof) * 0.99, # Prior probability of target speaker
'Pnon': (1 - Pspoof) * 0.01, # Prior probability of nontarget speaker
'Cmiss_asv': 1, # Cost of ASV system falsely rejecting target speaker
'Cfa_asv': 10, # Cost of ASV system falsely accepting nontarget speaker
'Cmiss_cm': 1, # Cost of CM system falsely rejecting target speaker
'Cfa_cm': 10, # Cost of CM system falsely accepting spoof
}
if legacy:
Pspoof = 0.05
cost_model = {
'Pspoof': Pspoof, # Prior probability of a spoofing attack
'Ptar': (1 - Pspoof) * 0.99, # Prior probability of target speaker
'Pnon': (1 - Pspoof) * 0.01, # Prior probability of nontarget speaker
'Cmiss_asv': 1, # Cost of ASV system falsely rejecting target speaker
'Cfa_asv': 10, # Cost of ASV system falsely accepting nontarget speaker
'Cmiss_cm': 1, # Cost of CM system falsely rejecting target speaker
'Cfa_cm': 10, # Cost of CM system falsely accepting spoof
}
else:
Pspoof = 0.05
cost_model = {
'Pspoof': Pspoof, # Prior probability of a spoofing attack
'Ptar': (1 - Pspoof) * 0.99, # Prior probability of target speaker
'Pnon': (1 - Pspoof) * 0.01, # Prior probability of nontarget speaker
'Cmiss': 1, # Cost of tandem system falsely rejecting target speaker
'Cfa': 10, # Cost of tandem system falsely accepting nontarget speaker
'Cfa_spoof': 10, # Cost of tandem system falsely accepting spoof
}

# Load organizers' ASV scores
asv_data = np.genfromtxt(asv_score_file, dtype=str)
asv_sources = asv_data[:, 0]
asv_keys = asv_data[:, 1]
asv_scores = asv_data[:, 2].astype(np.float)
asv_keys = asv_data[:, 4]
asv_scores = asv_data[:, 5].astype(np.float)

# Load CM scores
cm_data = np.genfromtxt(cm_score_file, dtype=str)
cm_utt_id = cm_data[:, 0]
cm_sources = cm_data[:, 1]
cm_keys = cm_data[:, 2]
cm_scores = cm_data[:, 3].astype(np.float)

other_cm_scores = -cm_scores
cm_utt_id = cm_data[:, 1]
cm_sources = cm_data[:, 0]
cm_keys = cm_data[:, 4]
cm_scores = cm_data[:, 5].astype(np.float)

# Extract target, nontarget, and spoof scores from the ASV scores
tar_asv = asv_scores[asv_keys == 'target']
Expand All @@ -46,35 +54,40 @@ def compute_eer_and_tdcf(cm_score_file, path_to_database):
eer_asv, asv_threshold = em.compute_eer(tar_asv, non_asv)
eer_cm = em.compute_eer(bona_cm, spoof_cm)[0]

other_eer_cm = em.compute_eer(other_cm_scores[cm_keys == 'bonafide'], other_cm_scores[cm_keys == 'spoof'])[0]

[Pfa_asv, Pmiss_asv, Pmiss_spoof_asv] = em.obtain_asv_error_rates(tar_asv, non_asv, spoof_asv, asv_threshold)
[Pfa_asv, Pmiss_asv, Pmiss_spoof_asv, Pfa_spoof_asv] = em.obtain_asv_error_rates(tar_asv, non_asv, spoof_asv, asv_threshold)

if eer_cm < other_eer_cm:
# Compute t-DCF
tDCF_curve, CM_thresholds = em.compute_tDCF(bona_cm, spoof_cm, Pfa_asv, Pmiss_asv, Pmiss_spoof_asv, cost_model, True)

# Minimum t-DCF
min_tDCF_index = np.argmin(tDCF_curve)
min_tDCF = tDCF_curve[min_tDCF_index]

# Compute t-DCF
if legacy:
tDCF_curve, CM_thresholds = em.compute_tDCF_legacy(bona_cm, spoof_cm, Pfa_asv, Pmiss_asv, Pmiss_spoof_asv, cost_model, True)
else:
tDCF_curve, CM_thresholds = em.compute_tDCF(other_cm_scores[cm_keys == 'bonafide'], other_cm_scores[cm_keys == 'spoof'],
Pfa_asv, Pmiss_asv, Pmiss_spoof_asv, cost_model, True)
tDCF_curve, CM_thresholds = em.compute_tDCF(bona_cm, spoof_cm, Pfa_asv, Pmiss_asv, Pfa_spoof_asv, cost_model, True)

# Minimum t-DCF
min_tDCF_index = np.argmin(tDCF_curve)
min_tDCF = tDCF_curve[min_tDCF_index]
# Minimum t-DCF
min_tDCF_index = np.argmin(tDCF_curve)
min_tDCF = tDCF_curve[min_tDCF_index]
min_tDCF_threshold = CM_thresholds[min_tDCF_index];

# compute DET of CM and get Pmiss and Pfa for the selected threshold t_CM
Pmiss_cm, Pfa_cm, CM_thresholds = em.compute_det_curve(bona_cm, spoof_cm)
Pmiss_t_CM = Pmiss_cm[CM_thresholds == min_tDCF_threshold]
Pfa_t_CM = Pfa_cm[CM_thresholds == min_tDCF_threshold]

# print('ASV SYSTEM')
# print(' EER = {:8.5f} % (Equal error rate (target vs. nontarget discrimination)'.format(eer_asv * 100))
# print(' Pfa = {:8.5f} % (False acceptance rate of nontargets)'.format(Pfa_asv * 100))
# print(' Pmiss = {:8.5f} % (False rejection rate of targets)'.format(Pmiss_asv * 100))
# print(' 1-Pmiss,spoof = {:8.5f} % (Spoof false acceptance rate)'.format((1 - Pmiss_spoof_asv) * 100))

print('ASV SYSTEM')
print(' EER = {:8.5f} % (Equal error rate (target vs. nontarget discrimination)'.format(eer_asv * 100))
print(' Pfa = {:8.5f} % (False acceptance rate of nontargets)'.format(Pfa_asv * 100))
print(' Pmiss = {:8.5f} % (False rejection rate of targets)'.format(Pmiss_asv * 100))
if legacy:
print(' 1-Pmiss,spoof = {:8.5f} % (Spoof false acceptance rate)'.format((1 - Pmiss_spoof_asv) * 100))
else:
print(' Pfa,spoof = {:8.5f} % (Spoof false acceptance rate)'.format((1 - Pmiss_spoof_asv) * 100))

print('\nCM SYSTEM')
print(' EER = {:8.5f} % (Equal error rate for countermeasure)'.format(min(eer_cm, other_eer_cm) * 100))
print(' EER = {:8.5f} % (Equal error rate for countermeasure)'.format(eer_cm * 100))
print(' Pfa(t_CM_min_tDCF) = {:8.5f} % (False acceptance rate of spoofs)'.format(Pfa_t_CM[0] * 100))
print(' Pmiss(t_CM_min_tDCF) = {:8.5f} % (Miss (false rejection) rate of bonafide)'.format(Pmiss_t_CM[0] * 100))

print('\nTANDEM')
print(' min-tDCF = {:8.5f}'.format(min_tDCF))
Expand All @@ -97,25 +110,20 @@ def compute_eer_and_tdcf(cm_score_file, path_to_database):
plt.hist(spoof_cm, histtype='step', density=True, bins=50, label='Spoof')
plt.legend()
plt.xlabel('CM score')
# plt.ylabel('Density')
#plt.ylabel('Density')
plt.title('CM score histogram')
plt.savefig(cm_score_file[:-4]+'1.png')


# Plot t-DCF as function of the CM threshold.
plt.figure()
plt.plot(CM_thresholds, tDCF_curve)
plt.plot(CM_thresholds[min_tDCF_index], min_tDCF, 'o', markersize=10, mfc='none', mew=2)
plt.xlabel('CM threshold index (operating point)')
plt.ylabel('Norm t-DCF')
plt.ylabel('Norm t-DCF');
plt.title('Normalized tandem t-DCF')
plt.plot([np.min(CM_thresholds), np.max(CM_thresholds)], [1, 1], '--', color='black')
plt.legend(('t-DCF', 'min t-DCF ({:.5f})'.format(min_tDCF), 'Arbitrarily bad CM (Norm t-DCF=1)'))
plt.xlim([np.min(CM_thresholds), np.max(CM_thresholds)])
plt.ylim([0, 1.5])
plt.savefig(cm_score_file[:-4]+'2.png')

plt.show()

return min(eer_cm, other_eer_cm), min_tDCF

174 changes: 161 additions & 13 deletions evaluation_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@ def obtain_asv_error_rates(tar_asv, non_asv, spoof_asv, asv_threshold):
# Rate of rejecting spoofs in ASV
if spoof_asv.size == 0:
Pmiss_spoof_asv = None
Pfa_spoof_asv = None
else:
Pmiss_spoof_asv = np.sum(spoof_asv < asv_threshold) / spoof_asv.size
Pfa_spoof_asv = np.sum(spoof_asv >= asv_threshold) / spoof_asv.size

return Pfa_asv, Pmiss_asv, Pmiss_spoof_asv
return Pfa_asv, Pmiss_asv, Pmiss_spoof_asv, Pfa_spoof_asv


def compute_det_curve(target_scores, nontarget_scores):
Expand Down Expand Up @@ -46,7 +48,154 @@ def compute_eer(target_scores, nontarget_scores):
return eer, thresholds[min_index]


def compute_tDCF(bonafide_score_cm, spoof_score_cm, Pfa_asv, Pmiss_asv, Pmiss_spoof_asv, cost_model, print_cost):
def compute_tDCF(bonafide_score_cm, spoof_score_cm, Pfa_asv, Pmiss_asv, Pfa_spoof_asv, cost_model, print_cost):
"""
Compute Tandem Detection Cost Function (t-DCF) [1] for a fixed ASV system.
In brief, t-DCF returns a detection cost of a cascaded system of this form,
Speech waveform -> [CM] -> [ASV] -> decision
where CM stands for countermeasure and ASV for automatic speaker
verification. The CM is therefore used as a 'gate' to decided whether or
not the input speech sample should be passed onwards to the ASV system.
Generally, both CM and ASV can do detection errors. Not all those errors
are necessarily equally cost, and not all types of users are necessarily
equally likely. The tandem t-DCF gives a principled with to compare
different spoofing countermeasures under a detection cost function
framework that takes that information into account.
INPUTS:
bonafide_score_cm A vector of POSITIVE CLASS (bona fide or human)
detection scores obtained by executing a spoofing
countermeasure (CM) on some positive evaluation trials.
trial represents a bona fide case.
spoof_score_cm A vector of NEGATIVE CLASS (spoofing attack)
detection scores obtained by executing a spoofing
CM on some negative evaluation trials.
Pfa_asv False alarm (false acceptance) rate of the ASV
system that is evaluated in tandem with the CM.
Assumed to be in fractions, not percentages.
Pmiss_asv Miss (false rejection) rate of the ASV system that
is evaluated in tandem with the spoofing CM.
Assumed to be in fractions, not percentages.
Pmiss_spoof_asv Miss rate of spoof samples of the ASV system that
is evaluated in tandem with the spoofing CM. That
is, the fraction of spoof samples that were
rejected by the ASV system.
cost_model A struct that contains the parameters of t-DCF,
with the following fields.
Ptar Prior probability of target speaker.
Pnon Prior probability of nontarget speaker (zero-effort impostor)
Psoof Prior probability of spoofing attack.
Cmiss Cost of tandem system falsely rejecting target speaker.
Cfa Cost of tandem system falsely accepting nontarget speaker.
Cfa_spoof Cost of tandem system falsely accepting spoof.
print_cost Print a summary of the cost parameters and the
implied t-DCF cost function?
OUTPUTS:
tDCF_norm Normalized t-DCF curve across the different CM
system operating points; see [2] for more details.
Normalized t-DCF > 1 indicates a useless
countermeasure (as the tandem system would do
better without it). min(tDCF_norm) will be the
minimum t-DCF used in ASVspoof 2019 [2].
CM_thresholds Vector of same size as tDCF_norm corresponding to
the CM threshold (operating point).
NOTE:
o In relative terms, higher detection scores values are assumed to
indicate stronger support for the bona fide hypothesis.
o You should provide real-valued soft scores, NOT hard decisions. The
recommendation is that the scores are log-likelihood ratios (LLRs)
from a bonafide-vs-spoof hypothesis based on some statistical model.
This, however, is NOT required. The scores can have arbitrary range
and scaling.
o Pfa_asv, Pmiss_asv, Pmiss_spoof_asv are in fractions, not percentages.
References:
[1] T. Kinnunen, H. Delgado, N. Evans,K.-A. Lee, V. Vestman,
A. Nautsch, M. Todisco, X. Wang, M. Sahidullah, J. Yamagishi,
and D.-A. Reynolds, "Tandem Assessment of Spoofing Countermeasures
and Automatic Speaker Verification: Fundamentals," IEEE/ACM Transaction on
Audio, Speech and Language Processing (TASLP).
[2] ASVspoof 2019 challenge evaluation plan
https://www.asvspoof.org/asvspoof2019/asvspoof2019_evaluation_plan.pdf
"""


# Sanity check of cost parameters
if cost_model['Cfa'] < 0 or cost_model['Cmiss'] < 0 or \
cost_model['Cfa'] < 0 or cost_model['Cmiss'] < 0:
print('WARNING: Usually the cost values should be positive!')

if cost_model['Ptar'] < 0 or cost_model['Pnon'] < 0 or cost_model['Pspoof'] < 0 or \
np.abs(cost_model['Ptar'] + cost_model['Pnon'] + cost_model['Pspoof'] - 1) > 1e-10:
sys.exit('ERROR: Your prior probabilities should be positive and sum up to one.')

# Unless we evaluate worst-case model, we need to have some spoof tests against asv
if Pfa_spoof_asv is None:
sys.exit('ERROR: you should provide false alarm rate of spoof tests against your ASV system.')

# Sanity check of scores
combined_scores = np.concatenate((bonafide_score_cm, spoof_score_cm))
if np.isnan(combined_scores).any() or np.isinf(combined_scores).any():
sys.exit('ERROR: Your scores contain nan or inf.')

# Sanity check that inputs are scores and not decisions
n_uniq = np.unique(combined_scores).size
if n_uniq < 3:
sys.exit('ERROR: You should provide soft CM scores - not binary decisions')

# Obtain miss and false alarm rates of CM
Pmiss_cm, Pfa_cm, CM_thresholds = compute_det_curve(bonafide_score_cm, spoof_score_cm)

# Constants - see ASVspoof 2019 evaluation plan

C0 = cost_model['Ptar'] * cost_model['Cmiss'] * Pmiss_asv + cost_model['Pnon']*cost_model['Cfa']*Pfa_asv
C1 = cost_model['Ptar'] * cost_model['Cmiss'] - (cost_model['Ptar'] * cost_model['Cmiss'] * Pmiss_asv + cost_model['Pnon'] * cost_model['Cfa'] * Pfa_asv)
C2 = cost_model['Pspoof'] * cost_model['Cfa_spoof'] * Pfa_spoof_asv;


# Sanity check of the weights
if C0 < 0 or C1 < 0 or C2 < 0:
sys.exit('You should never see this error but I cannot evalute tDCF with negative weights - please check whether your ASV error rates are correctly computed?')

# Obtain t-DCF curve for all thresholds
tDCF = C0 + C1 * Pmiss_cm + C2 * Pfa_cm

# Obtain default t-DCF
tDCF_default = C0 + np.minimum(C1, C2)

# Normalized t-DCF
tDCF_norm = tDCF / tDCF_default

# Everything should be fine if reaching here.
if print_cost:

print('t-DCF evaluation from [Nbona={}, Nspoof={}] trials\n'.format(bonafide_score_cm.size, spoof_score_cm.size))
print('t-DCF MODEL')
print(' Ptar = {:8.5f} (Prior probability of target user)'.format(cost_model['Ptar']))
print(' Pnon = {:8.5f} (Prior probability of nontarget user)'.format(cost_model['Pnon']))
print(' Pspoof = {:8.5f} (Prior probability of spoofing attack)'.format(cost_model['Pspoof']))
print(' Cfa = {:8.5f} (Cost of tandem system falsely accepting a nontarget)'.format(cost_model['Cfa']))
print(' Cmiss = {:8.5f} (Cost of tandem system falsely rejecting target speaker)'.format(cost_model['Cmiss']))
print(' Cfa_spoof = {:8.5f} (Cost of tandem sysmte falsely accepting spoof)'.format(cost_model['Cfa_spoof']))
print('\n Implied normalized t-DCF function (depends on t-DCF parameters and ASV errors), t_CM=CM threshold)')
print(' tDCF_norm(t_CM) = {:8.5f} + {:8.5f} x Pmiss_cm(t_CM) + {:8.5f} x Pfa_cm(t_CM)\n'.format(C0/tDCF_default, C1/tDCF_default, C2/tDCF_default))
print(' * The optimum value is given by the first term (0.06273). This is the normalized t-DCF obtained with an error-free CM system.')
print(' * The minimum normalized cost (minimum over all possible thresholds) is always <= 1.00.')
print('')

return tDCF_norm, CM_thresholds

def compute_tDCF_legacy(bonafide_score_cm, spoof_score_cm, Pfa_asv, Pmiss_asv, Pmiss_spoof_asv, cost_model, print_cost):
"""
Compute Tandem Detection Cost Function (t-DCF) [1] for a fixed ASV system.
In brief, t-DCF returns a detection cost of a cascaded system of this form,
Expand Down Expand Up @@ -126,7 +275,7 @@ def compute_tDCF(bonafide_score_cm, spoof_score_cm, Pfa_asv, Pmiss_asv, Pmiss_sp
France, June 2018 (https://www.isca-speech.org/archive/Odyssey_2018/pdfs/68.pdf)
[2] ASVspoof 2019 challenge evaluation plan
TODO: <add link
https://www.asvspoof.org/asvspoof2019/asvspoof2019_evaluation_plan.pdf
"""


Expand Down Expand Up @@ -175,20 +324,19 @@ def compute_tDCF(bonafide_score_cm, spoof_score_cm, Pfa_asv, Pmiss_asv, Pmiss_sp
if print_cost:

print('t-DCF evaluation from [Nbona={}, Nspoof={}] trials\n'.format(bonafide_score_cm.size, spoof_score_cm.size))
# print('t-DCF MODEL')
# print(' Ptar = {:8.5f} (Prior probability of target user)'.format(cost_model['Ptar']))
# print(' Pnon = {:8.5f} (Prior probability of nontarget user)'.format(cost_model['Pnon']))
# print(' Pspoof = {:8.5f} (Prior probability of spoofing attack)'.format(cost_model['Pspoof']))
# print(' Cfa_asv = {:8.5f} (Cost of ASV falsely accepting a nontarget)'.format(cost_model['Cfa_asv']))
# print(' Cmiss_asv = {:8.5f} (Cost of ASV falsely rejecting target speaker)'.format(cost_model['Cmiss_asv']))
# print(' Cfa_cm = {:8.5f} (Cost of CM falsely passing a spoof to ASV system)'.format(cost_model['Cfa_cm']))
# print(' Cmiss_cm = {:8.5f} (Cost of CM falsely blocking target utterance which never reaches ASV)'.format(cost_model['Cmiss_cm']))
# print('\n Implied normalized t-DCF function (depends on t-DCF parameters and ASV errors), s=CM threshold)')
print('t-DCF MODEL')
print(' Ptar = {:8.5f} (Prior probability of target user)'.format(cost_model['Ptar']))
print(' Pnon = {:8.5f} (Prior probability of nontarget user)'.format(cost_model['Pnon']))
print(' Pspoof = {:8.5f} (Prior probability of spoofing attack)'.format(cost_model['Pspoof']))
print(' Cfa_asv = {:8.5f} (Cost of ASV falsely accepting a nontarget)'.format(cost_model['Cfa_asv']))
print(' Cmiss_asv = {:8.5f} (Cost of ASV falsely rejecting target speaker)'.format(cost_model['Cmiss_asv']))
print(' Cfa_cm = {:8.5f} (Cost of CM falsely passing a spoof to ASV system)'.format(cost_model['Cfa_cm']))
print(' Cmiss_cm = {:8.5f} (Cost of CM falsely blocking target utterance which never reaches ASV)'.format(cost_model['Cmiss_cm']))
print('\n Implied normalized t-DCF function (depends on t-DCF parameters and ASV errors), s=CM threshold)')

if C2 == np.minimum(C1, C2):
print(' tDCF_norm(s) = {:8.5f} x Pmiss_cm(s) + Pfa_cm(s)\n'.format(C1 / C2))
else:
print(' tDCF_norm(s) = Pmiss_cm(s) + {:8.5f} x Pfa_cm(s)\n'.format(C2 / C1))

return tDCF_norm, CM_thresholds

Loading

0 comments on commit b280006

Please sign in to comment.