feat(): change feature type to cqt spectrogram and fix some problems …

…in min-tDCF
brdhunga · Oct 1, 2021 · b280006 · b280006
1 parent 26bc6a9
commit b280006
Show file tree

Hide file tree

Showing 6 changed files with 323 additions and 266 deletions.
diff --git a/evaluate_tDCF_asvspoof19.py b/evaluate_tDCF_asvspoof19.py
@@ -1,37 +1,45 @@
-import os
+import sys
 import numpy as np
-import evaluation_metrics as em
+import eval_metrics as em
 import matplotlib.pyplot as plt
 
-def compute_eer_and_tdcf(cm_score_file, path_to_database):
-    asv_score_file = os.path.join(path_to_database, 'ASVspoof2019.scores.txt')
+def evaluate_tDCF_asvspoof19(cm_score_file, asv_score_file, legacy):
 
     # Fix tandem detection cost function (t-DCF) parameters
-    Pspoof = 0.05
-    cost_model = {
-        'Pspoof': Pspoof,  # Prior probability of a spoofing attack
-        'Ptar': (1 - Pspoof) * 0.99,  # Prior probability of target speaker
-        'Pnon': (1 - Pspoof) * 0.01,  # Prior probability of nontarget speaker
-        'Cmiss_asv': 1,  # Cost of ASV system falsely rejecting target speaker
-        'Cfa_asv': 10,  # Cost of ASV system falsely accepting nontarget speaker
-        'Cmiss_cm': 1,  # Cost of CM system falsely rejecting target speaker
-        'Cfa_cm': 10,  # Cost of CM system falsely accepting spoof
-    }
+    if legacy:
+        Pspoof = 0.05
+        cost_model = {
+            'Pspoof': Pspoof,  # Prior probability of a spoofing attack
+            'Ptar': (1 - Pspoof) * 0.99,  # Prior probability of target speaker
+            'Pnon': (1 - Pspoof) * 0.01,  # Prior probability of nontarget speaker
+            'Cmiss_asv': 1,  # Cost of ASV system falsely rejecting target speaker
+            'Cfa_asv': 10,  # Cost of ASV system falsely accepting nontarget speaker
+            'Cmiss_cm': 1,  # Cost of CM system falsely rejecting target speaker
+            'Cfa_cm': 10,  # Cost of CM system falsely accepting spoof
+        }
+    else:
+        Pspoof = 0.05
+        cost_model = {
+            'Pspoof': Pspoof,  # Prior probability of a spoofing attack
+            'Ptar': (1 - Pspoof) * 0.99,  # Prior probability of target speaker
+            'Pnon': (1 - Pspoof) * 0.01,  # Prior probability of nontarget speaker
+            'Cmiss': 1,  # Cost of tandem system falsely rejecting target speaker
+            'Cfa': 10,  # Cost of tandem system falsely accepting nontarget speaker
+            'Cfa_spoof': 10,  # Cost of tandem system falsely accepting spoof
+        }
 
     # Load organizers' ASV scores
     asv_data = np.genfromtxt(asv_score_file, dtype=str)
     asv_sources = asv_data[:, 0]
-    asv_keys = asv_data[:, 1]
-    asv_scores = asv_data[:, 2].astype(np.float)
+    asv_keys = asv_data[:, 4]
+    asv_scores = asv_data[:, 5].astype(np.float)
 
     # Load CM scores
     cm_data = np.genfromtxt(cm_score_file, dtype=str)
-    cm_utt_id = cm_data[:, 0]
-    cm_sources = cm_data[:, 1]
-    cm_keys = cm_data[:, 2]
-    cm_scores = cm_data[:, 3].astype(np.float)
-
-    other_cm_scores = -cm_scores
+    cm_utt_id = cm_data[:, 1]
+    cm_sources = cm_data[:, 0]
+    cm_keys = cm_data[:, 4]
+    cm_scores = cm_data[:, 5].astype(np.float)
 
     # Extract target, nontarget, and spoof scores from the ASV scores
     tar_asv = asv_scores[asv_keys == 'target']
@@ -46,35 +54,40 @@ def compute_eer_and_tdcf(cm_score_file, path_to_database):
     eer_asv, asv_threshold = em.compute_eer(tar_asv, non_asv)
     eer_cm = em.compute_eer(bona_cm, spoof_cm)[0]
 
-    other_eer_cm = em.compute_eer(other_cm_scores[cm_keys == 'bonafide'], other_cm_scores[cm_keys == 'spoof'])[0]
 
-    [Pfa_asv, Pmiss_asv, Pmiss_spoof_asv] = em.obtain_asv_error_rates(tar_asv, non_asv, spoof_asv, asv_threshold)
+    [Pfa_asv, Pmiss_asv, Pmiss_spoof_asv, Pfa_spoof_asv] = em.obtain_asv_error_rates(tar_asv, non_asv, spoof_asv, asv_threshold)
 
-    if eer_cm < other_eer_cm:
-        # Compute t-DCF
-        tDCF_curve, CM_thresholds = em.compute_tDCF(bona_cm, spoof_cm, Pfa_asv, Pmiss_asv, Pmiss_spoof_asv, cost_model, True)
-
-        # Minimum t-DCF
-        min_tDCF_index = np.argmin(tDCF_curve)
-        min_tDCF = tDCF_curve[min_tDCF_index]
 
+    # Compute t-DCF
+    if legacy:
+        tDCF_curve, CM_thresholds = em.compute_tDCF_legacy(bona_cm, spoof_cm, Pfa_asv, Pmiss_asv, Pmiss_spoof_asv, cost_model, True)
     else:
-        tDCF_curve, CM_thresholds = em.compute_tDCF(other_cm_scores[cm_keys == 'bonafide'], other_cm_scores[cm_keys == 'spoof'],
-                                                    Pfa_asv, Pmiss_asv, Pmiss_spoof_asv, cost_model, True)
+        tDCF_curve, CM_thresholds = em.compute_tDCF(bona_cm, spoof_cm, Pfa_asv, Pmiss_asv, Pfa_spoof_asv, cost_model, True)
 
-        # Minimum t-DCF
-        min_tDCF_index = np.argmin(tDCF_curve)
-        min_tDCF = tDCF_curve[min_tDCF_index]
+    # Minimum t-DCF
+    min_tDCF_index = np.argmin(tDCF_curve)
+    min_tDCF = tDCF_curve[min_tDCF_index]
+    min_tDCF_threshold = CM_thresholds[min_tDCF_index];
 
+    # compute DET of CM and get Pmiss and Pfa for the selected threshold t_CM
+    Pmiss_cm, Pfa_cm, CM_thresholds = em.compute_det_curve(bona_cm, spoof_cm)
+    Pmiss_t_CM = Pmiss_cm[CM_thresholds == min_tDCF_threshold]
+    Pfa_t_CM = Pfa_cm[CM_thresholds == min_tDCF_threshold]
 
-    # print('ASV SYSTEM')
-    # print('   EER            = {:8.5f} % (Equal error rate (target vs. nontarget discrimination)'.format(eer_asv * 100))
-    # print('   Pfa            = {:8.5f} % (False acceptance rate of nontargets)'.format(Pfa_asv * 100))
-    # print('   Pmiss          = {:8.5f} % (False rejection rate of targets)'.format(Pmiss_asv * 100))
-    # print('   1-Pmiss,spoof  = {:8.5f} % (Spoof false acceptance rate)'.format((1 - Pmiss_spoof_asv) * 100))
+
+    print('ASV SYSTEM')
+    print('   EER            = {:8.5f} % (Equal error rate (target vs. nontarget discrimination)'.format(eer_asv * 100))
+    print('   Pfa            = {:8.5f} % (False acceptance rate of nontargets)'.format(Pfa_asv * 100))
+    print('   Pmiss          = {:8.5f} % (False rejection rate of targets)'.format(Pmiss_asv * 100))
+    if legacy:
+        print('   1-Pmiss,spoof  = {:8.5f} % (Spoof false acceptance rate)'.format((1 - Pmiss_spoof_asv) * 100))
+    else:
+        print('   Pfa,spoof  = {:8.5f} % (Spoof false acceptance rate)'.format((1 - Pmiss_spoof_asv) * 100))
 
     print('\nCM SYSTEM')
-    print('   EER            = {:8.5f} % (Equal error rate for countermeasure)'.format(min(eer_cm, other_eer_cm) * 100))
+    print('   EER                  = {:8.5f} % (Equal error rate for countermeasure)'.format(eer_cm * 100))
+    print('   Pfa(t_CM_min_tDCF)   = {:8.5f} % (False acceptance rate of spoofs)'.format(Pfa_t_CM[0] * 100))
+    print('   Pmiss(t_CM_min_tDCF) = {:8.5f} % (Miss (false rejection) rate of bonafide)'.format(Pmiss_t_CM[0] * 100))
 
     print('\nTANDEM')
     print('   min-tDCF       = {:8.5f}'.format(min_tDCF))
@@ -97,25 +110,20 @@ def compute_eer_and_tdcf(cm_score_file, path_to_database):
     plt.hist(spoof_cm, histtype='step', density=True, bins=50, label='Spoof')
     plt.legend()
     plt.xlabel('CM score')
-    # plt.ylabel('Density')
+    #plt.ylabel('Density')
     plt.title('CM score histogram')
-    plt.savefig(cm_score_file[:-4]+'1.png')
 
 
     # Plot t-DCF as function of the CM threshold.
     plt.figure()
     plt.plot(CM_thresholds, tDCF_curve)
     plt.plot(CM_thresholds[min_tDCF_index], min_tDCF, 'o', markersize=10, mfc='none', mew=2)
     plt.xlabel('CM threshold index (operating point)')
-    plt.ylabel('Norm t-DCF')
+    plt.ylabel('Norm t-DCF');
     plt.title('Normalized tandem t-DCF')
     plt.plot([np.min(CM_thresholds), np.max(CM_thresholds)], [1, 1], '--', color='black')
     plt.legend(('t-DCF', 'min t-DCF ({:.5f})'.format(min_tDCF), 'Arbitrarily bad CM (Norm t-DCF=1)'))
     plt.xlim([np.min(CM_thresholds), np.max(CM_thresholds)])
     plt.ylim([0, 1.5])
-    plt.savefig(cm_score_file[:-4]+'2.png')
 
     plt.show()
-
-    return min(eer_cm, other_eer_cm), min_tDCF
-
diff --git a/evaluation_metrics.py b/evaluation_metrics.py
@@ -10,10 +10,12 @@ def obtain_asv_error_rates(tar_asv, non_asv, spoof_asv, asv_threshold):
     # Rate of rejecting spoofs in ASV
     if spoof_asv.size == 0:
         Pmiss_spoof_asv = None
+        Pfa_spoof_asv = None
     else:
         Pmiss_spoof_asv = np.sum(spoof_asv < asv_threshold) / spoof_asv.size
+        Pfa_spoof_asv = np.sum(spoof_asv >= asv_threshold) / spoof_asv.size
 
-    return Pfa_asv, Pmiss_asv, Pmiss_spoof_asv
+    return Pfa_asv, Pmiss_asv, Pmiss_spoof_asv, Pfa_spoof_asv
 
 
 def compute_det_curve(target_scores, nontarget_scores):
@@ -46,7 +48,154 @@ def compute_eer(target_scores, nontarget_scores):
     return eer, thresholds[min_index]
 
 
-def compute_tDCF(bonafide_score_cm, spoof_score_cm, Pfa_asv, Pmiss_asv, Pmiss_spoof_asv, cost_model, print_cost):
+def compute_tDCF(bonafide_score_cm, spoof_score_cm, Pfa_asv, Pmiss_asv, Pfa_spoof_asv, cost_model, print_cost):
+    """
+    Compute Tandem Detection Cost Function (t-DCF) [1] for a fixed ASV system.
+    In brief, t-DCF returns a detection cost of a cascaded system of this form,
+
+      Speech waveform -> [CM] -> [ASV] -> decision
+
+    where CM stands for countermeasure and ASV for automatic speaker
+    verification. The CM is therefore used as a 'gate' to decided whether or
+    not the input speech sample should be passed onwards to the ASV system.
+    Generally, both CM and ASV can do detection errors. Not all those errors
+    are necessarily equally cost, and not all types of users are necessarily
+    equally likely. The tandem t-DCF gives a principled with to compare
+    different spoofing countermeasures under a detection cost function
+    framework that takes that information into account.
+
+    INPUTS:
+
+      bonafide_score_cm   A vector of POSITIVE CLASS (bona fide or human)
+                          detection scores obtained by executing a spoofing
+                          countermeasure (CM) on some positive evaluation trials.
+                          trial represents a bona fide case.
+      spoof_score_cm      A vector of NEGATIVE CLASS (spoofing attack)
+                          detection scores obtained by executing a spoofing
+                          CM on some negative evaluation trials.
+      Pfa_asv             False alarm (false acceptance) rate of the ASV
+                          system that is evaluated in tandem with the CM.
+                          Assumed to be in fractions, not percentages.
+      Pmiss_asv           Miss (false rejection) rate of the ASV system that
+                          is evaluated in tandem with the spoofing CM.
+                          Assumed to be in fractions, not percentages.
+      Pmiss_spoof_asv     Miss rate of spoof samples of the ASV system that
+                          is evaluated in tandem with the spoofing CM. That
+                          is, the fraction of spoof samples that were
+                          rejected by the ASV system.
+      cost_model          A struct that contains the parameters of t-DCF,
+                          with the following fields.
+
+                          Ptar        Prior probability of target speaker.
+                          Pnon        Prior probability of nontarget speaker (zero-effort impostor)
+                          Psoof       Prior probability of spoofing attack.
+                          Cmiss       Cost of tandem system falsely rejecting target speaker.
+                          Cfa         Cost of tandem system falsely accepting nontarget speaker.
+                          Cfa_spoof   Cost of tandem system falsely accepting spoof.
+
+      print_cost          Print a summary of the cost parameters and the
+                          implied t-DCF cost function?
+
+    OUTPUTS:
+
+      tDCF_norm           Normalized t-DCF curve across the different CM
+                          system operating points; see [2] for more details.
+                          Normalized t-DCF > 1 indicates a useless
+                          countermeasure (as the tandem system would do
+                          better without it). min(tDCF_norm) will be the
+                          minimum t-DCF used in ASVspoof 2019 [2].
+      CM_thresholds       Vector of same size as tDCF_norm corresponding to
+                          the CM threshold (operating point).
+
+    NOTE:
+    o     In relative terms, higher detection scores values are assumed to
+          indicate stronger support for the bona fide hypothesis.
+    o     You should provide real-valued soft scores, NOT hard decisions. The
+          recommendation is that the scores are log-likelihood ratios (LLRs)
+          from a bonafide-vs-spoof hypothesis based on some statistical model.
+          This, however, is NOT required. The scores can have arbitrary range
+          and scaling.
+    o     Pfa_asv, Pmiss_asv, Pmiss_spoof_asv are in fractions, not percentages.
+
+    References:
+
+      [1] T. Kinnunen, H. Delgado, N. Evans,K.-A. Lee, V. Vestman, 
+          A. Nautsch, M. Todisco, X. Wang, M. Sahidullah, J. Yamagishi, 
+          and D.-A. Reynolds, "Tandem Assessment of Spoofing Countermeasures
+          and Automatic Speaker Verification: Fundamentals," IEEE/ACM Transaction on
+          Audio, Speech and Language Processing (TASLP).
+
+      [2] ASVspoof 2019 challenge evaluation plan
+          https://www.asvspoof.org/asvspoof2019/asvspoof2019_evaluation_plan.pdf
+    """
+
+
+    # Sanity check of cost parameters
+    if cost_model['Cfa'] < 0 or cost_model['Cmiss'] < 0 or \
+            cost_model['Cfa'] < 0 or cost_model['Cmiss'] < 0:
+        print('WARNING: Usually the cost values should be positive!')
+
+    if cost_model['Ptar'] < 0 or cost_model['Pnon'] < 0 or cost_model['Pspoof'] < 0 or \
+            np.abs(cost_model['Ptar'] + cost_model['Pnon'] + cost_model['Pspoof'] - 1) > 1e-10:
+        sys.exit('ERROR: Your prior probabilities should be positive and sum up to one.')
+
+    # Unless we evaluate worst-case model, we need to have some spoof tests against asv
+    if Pfa_spoof_asv is None:
+        sys.exit('ERROR: you should provide false alarm rate of spoof tests against your ASV system.')
+
+    # Sanity check of scores
+    combined_scores = np.concatenate((bonafide_score_cm, spoof_score_cm))
+    if np.isnan(combined_scores).any() or np.isinf(combined_scores).any():
+        sys.exit('ERROR: Your scores contain nan or inf.')
+
+    # Sanity check that inputs are scores and not decisions
+    n_uniq = np.unique(combined_scores).size
+    if n_uniq < 3:
+        sys.exit('ERROR: You should provide soft CM scores - not binary decisions')
+
+    # Obtain miss and false alarm rates of CM
+    Pmiss_cm, Pfa_cm, CM_thresholds = compute_det_curve(bonafide_score_cm, spoof_score_cm)
+
+    # Constants - see ASVspoof 2019 evaluation plan
+
+    C0 = cost_model['Ptar'] * cost_model['Cmiss'] * Pmiss_asv + cost_model['Pnon']*cost_model['Cfa']*Pfa_asv
+    C1 = cost_model['Ptar'] * cost_model['Cmiss'] - (cost_model['Ptar'] * cost_model['Cmiss'] * Pmiss_asv + cost_model['Pnon'] * cost_model['Cfa'] * Pfa_asv)
+    C2 = cost_model['Pspoof'] * cost_model['Cfa_spoof'] * Pfa_spoof_asv;
+
+
+    # Sanity check of the weights
+    if C0 < 0 or C1 < 0 or C2 < 0:
+        sys.exit('You should never see this error but I cannot evalute tDCF with negative weights - please check whether your ASV error rates are correctly computed?')
+
+    # Obtain t-DCF curve for all thresholds
+    tDCF = C0 + C1 * Pmiss_cm + C2 * Pfa_cm
+
+    # Obtain default t-DCF
+    tDCF_default = C0 + np.minimum(C1, C2)
+
+    # Normalized t-DCF
+    tDCF_norm = tDCF / tDCF_default
+
+    # Everything should be fine if reaching here.
+    if print_cost:
+
+        print('t-DCF evaluation from [Nbona={}, Nspoof={}] trials\n'.format(bonafide_score_cm.size, spoof_score_cm.size))
+        print('t-DCF MODEL')
+        print('   Ptar         = {:8.5f} (Prior probability of target user)'.format(cost_model['Ptar']))
+        print('   Pnon         = {:8.5f} (Prior probability of nontarget user)'.format(cost_model['Pnon']))
+        print('   Pspoof       = {:8.5f} (Prior probability of spoofing attack)'.format(cost_model['Pspoof']))
+        print('   Cfa          = {:8.5f} (Cost of tandem system falsely accepting a nontarget)'.format(cost_model['Cfa']))
+        print('   Cmiss        = {:8.5f} (Cost of tandem system falsely rejecting target speaker)'.format(cost_model['Cmiss']))
+        print('   Cfa_spoof    = {:8.5f} (Cost of tandem sysmte falsely accepting spoof)'.format(cost_model['Cfa_spoof']))
+        print('\n   Implied normalized t-DCF function (depends on t-DCF parameters and ASV errors), t_CM=CM threshold)')
+        print('   tDCF_norm(t_CM) = {:8.5f} + {:8.5f} x Pmiss_cm(t_CM) + {:8.5f} x Pfa_cm(t_CM)\n'.format(C0/tDCF_default, C1/tDCF_default, C2/tDCF_default))
+        print('     * The optimum value is given by the first term (0.06273). This is the normalized t-DCF obtained with an error-free CM system.')
+        print('     * The minimum normalized cost (minimum over all possible thresholds) is always <= 1.00.')
+        print('')
+
+    return tDCF_norm, CM_thresholds
+
+def compute_tDCF_legacy(bonafide_score_cm, spoof_score_cm, Pfa_asv, Pmiss_asv, Pmiss_spoof_asv, cost_model, print_cost):
     """
     Compute Tandem Detection Cost Function (t-DCF) [1] for a fixed ASV system.
     In brief, t-DCF returns a detection cost of a cascaded system of this form,
@@ -126,7 +275,7 @@ def compute_tDCF(bonafide_score_cm, spoof_score_cm, Pfa_asv, Pmiss_asv, Pmiss_sp
           France, June 2018 (https://www.isca-speech.org/archive/Odyssey_2018/pdfs/68.pdf)
 
       [2] ASVspoof 2019 challenge evaluation plan
-          TODO: <add link
+          https://www.asvspoof.org/asvspoof2019/asvspoof2019_evaluation_plan.pdf
     """
 
 
@@ -175,20 +324,19 @@ def compute_tDCF(bonafide_score_cm, spoof_score_cm, Pfa_asv, Pmiss_asv, Pmiss_sp
     if print_cost:
 
         print('t-DCF evaluation from [Nbona={}, Nspoof={}] trials\n'.format(bonafide_score_cm.size, spoof_score_cm.size))
-        # print('t-DCF MODEL')
-        # print('   Ptar         = {:8.5f} (Prior probability of target user)'.format(cost_model['Ptar']))
-        # print('   Pnon         = {:8.5f} (Prior probability of nontarget user)'.format(cost_model['Pnon']))
-        # print('   Pspoof       = {:8.5f} (Prior probability of spoofing attack)'.format(cost_model['Pspoof']))
-        # print('   Cfa_asv      = {:8.5f} (Cost of ASV falsely accepting a nontarget)'.format(cost_model['Cfa_asv']))
-        # print('   Cmiss_asv    = {:8.5f} (Cost of ASV falsely rejecting target speaker)'.format(cost_model['Cmiss_asv']))
-        # print('   Cfa_cm       = {:8.5f} (Cost of CM falsely passing a spoof to ASV system)'.format(cost_model['Cfa_cm']))
-        # print('   Cmiss_cm     = {:8.5f} (Cost of CM falsely blocking target utterance which never reaches ASV)'.format(cost_model['Cmiss_cm']))
-        # print('\n   Implied normalized t-DCF function (depends on t-DCF parameters and ASV errors), s=CM threshold)')
+        print('t-DCF MODEL')
+        print('   Ptar         = {:8.5f} (Prior probability of target user)'.format(cost_model['Ptar']))
+        print('   Pnon         = {:8.5f} (Prior probability of nontarget user)'.format(cost_model['Pnon']))
+        print('   Pspoof       = {:8.5f} (Prior probability of spoofing attack)'.format(cost_model['Pspoof']))
+        print('   Cfa_asv      = {:8.5f} (Cost of ASV falsely accepting a nontarget)'.format(cost_model['Cfa_asv']))
+        print('   Cmiss_asv    = {:8.5f} (Cost of ASV falsely rejecting target speaker)'.format(cost_model['Cmiss_asv']))
+        print('   Cfa_cm       = {:8.5f} (Cost of CM falsely passing a spoof to ASV system)'.format(cost_model['Cfa_cm']))
+        print('   Cmiss_cm     = {:8.5f} (Cost of CM falsely blocking target utterance which never reaches ASV)'.format(cost_model['Cmiss_cm']))
+        print('\n   Implied normalized t-DCF function (depends on t-DCF parameters and ASV errors), s=CM threshold)')
 
         if C2 == np.minimum(C1, C2):
             print('   tDCF_norm(s) = {:8.5f} x Pmiss_cm(s) + Pfa_cm(s)\n'.format(C1 / C2))
         else:
             print('   tDCF_norm(s) = Pmiss_cm(s) + {:8.5f} x Pfa_cm(s)\n'.format(C2 / C1))
 
     return tDCF_norm, CM_thresholds
-