From fa24a237a9b22c53372b3502f19560b55a3fc67e Mon Sep 17 00:00:00 2001 From: Daniel Brooks Date: Fri, 6 Sep 2019 08:57:25 -0700 Subject: [PATCH 1/3] use all bits of dt in the hash --- audfprint_analyze.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/audfprint_analyze.py b/audfprint_analyze.py index 3a73ec3..85b53a4 100644 --- a/audfprint_analyze.py +++ b/audfprint_analyze.py @@ -140,7 +140,7 @@ def __init__(self, density=DENSITY): # min time separation (traditionally 1, upped 2014-08-04) self.mindt = 2 # max lookahead in time (LIMITED TO <64 IN LANDMARK2HASH) - self.targetdt = 63 + self.targetdt = 65 # global stores duration of most recently-read soundfile self.soundfiledur = 0.0 # .. and total amount of sound processed @@ -329,7 +329,7 @@ def peaks2landmarks(self, pklist): for peak in peaks_at[col]: pairsthispeak = 0 for col2 in range(col + self.mindt, - min(scols, col + self.targetdt)): + min(scols, col + self.targetdt + 1)): if pairsthispeak < self.maxpairsperpeak: for peak2 in peaks_at[col2]: if abs(peak2 - peak) < self.targetdf: @@ -337,7 +337,7 @@ def peaks2landmarks(self, pklist): if pairsthispeak < self.maxpairsperpeak: # We have a pair! landmarks.append((col, peak, - peak2, col2 - col)) + peak2, col2 - col - self.mindt)) pairsthispeak += 1 return landmarks From d76fb9e992573fdacd00dc3ed0f31f99e329beb0 Mon Sep 17 00:00:00 2001 From: Daniel Brooks Date: Fri, 6 Sep 2019 08:58:11 -0700 Subject: [PATCH 2/3] use all bits of df in the hash --- audfprint_analyze.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/audfprint_analyze.py b/audfprint_analyze.py index 85b53a4..4a8aff9 100644 --- a/audfprint_analyze.py +++ b/audfprint_analyze.py @@ -135,8 +135,8 @@ def __init__(self, density=DENSITY): # Limit the num of pairs we'll make from each peak (Fanout) self.maxpairsperpeak = 3 # Values controlling peaks2landmarks - # +/- 31 bins in freq (LIMITED TO -32..31 IN LANDMARK2HASH) - self.targetdf = 31 + # +/- 32 bins in freq (LIMITED TO -32..31 IN LANDMARK2HASH) + self.targetdf = 32 # min time separation (traditionally 1, upped 2014-08-04) self.mindt = 2 # max lookahead in time (LIMITED TO <64 IN LANDMARK2HASH) @@ -332,7 +332,8 @@ def peaks2landmarks(self, pklist): min(scols, col + self.targetdt + 1)): if pairsthispeak < self.maxpairsperpeak: for peak2 in peaks_at[col2]: - if abs(peak2 - peak) < self.targetdf: + df = peak2 - peak + if -self.targetdf <= df and df < self.targetdf: # and abs(peak2-peak) + abs(col2-col) > 2 ): if pairsthispeak < self.maxpairsperpeak: # We have a pair! From 393e3595a21611b2fa2cc0c71f10775b764312f7 Mon Sep 17 00:00:00 2001 From: Daniel Brooks Date: Fri, 6 Sep 2019 09:03:26 -0700 Subject: [PATCH 3/3] compute two additional fft bins, then discard the bottom and top bin --- audfprint_analyze.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/audfprint_analyze.py b/audfprint_analyze.py index 4a8aff9..234bdb3 100644 --- a/audfprint_analyze.py +++ b/audfprint_analyze.py @@ -277,7 +277,7 @@ def find_peaks(self, d, sr): a_dec = (1 - 0.01 * (self.density * np.sqrt(self.n_hop / 352.8) / 35)) ** (1 / OVERSAMP) # Take spectrogram mywin = np.hanning(self.n_fft + 2)[1:-1] - sgram = np.abs(stft.stft(d, n_fft=self.n_fft, + sgram = np.abs(stft.stft(d, n_fft=self.n_fft+2, hop_length=self.n_hop, window=mywin)) sgrammax = np.max(sgram) @@ -289,10 +289,10 @@ def find_peaks(self, d, sr): # zero. Not good, but let's let it through for now. print("find_peaks: Warning: input signal is identically zero.") # High-pass filter onset emphasis - # [:-1,] discards top bin (nyquist) of sgram so bins fit in 8 bits + # [1:-1,] discards bottom (0hz) and top (nyquist) bins of sgram so bins fit in 8 bits sgram = np.array([scipy.signal.lfilter([1, -1], [1, -HPF_POLE ** (1 / OVERSAMP)], s_row) - for s_row in sgram])[:-1, ] + for s_row in sgram])[1:-1, ] # Prune to keep only local maxima in spectrum that appear above an online, # decaying threshold peaks = self._decaying_threshold_fwd_prune(sgram, a_dec)