mir-evaluation · cwitkowitz · Feb 13, 2021 · Feb 22, 2021 · Feb 23, 2021 · Mar 2, 2021
diff --git a/mir_eval/multipitch.py b/mir_eval/multipitch.py
@@ -125,6 +125,14 @@ def resample_multipitch(times, frequencies, target_times):
     if times.size == 0:
         return [np.array([])]*len(target_times)
 
+    # Warn when the delta between the original times is not constant
+    if not np.allclose(np.diff(times), np.diff(times).mean()):
+        warnings.warn(
+            "Non-uniform timescale passed to resample_multipitch.  Pitch "
+            "will be nearest-neighbor interpolated, which will result in "
+            "undesirable behavior if silences are indicated by missing values."
+            "  Silences should be indicated by empty arrays, i.e. np.array([]).")
+
     n_times = len(frequencies)
 
     # scipy's interpolate doesn't handle ragged arrays. Instead, we interpolate

diff --git a/mir_eval/util.py b/mir_eval/util.py
@@ -8,6 +8,7 @@
 import six
 
 import numpy as np
+import warnings
 
 
 def index_labels(labels, case_sensitive=False):
@@ -956,3 +957,89 @@ def midi_to_hz(midi):
         Frequency/frequencies in Hz corresponding to `midi`
     '''
     return 440.0 * (2.0 ** ((midi - 69.0)/12.0))
+
+
+def estimate_hop_length(times):
+    '''Estimate hop length of a semi-regular but non-uniform series of times.
+
+    Parameters
+    ----------
+    times : ndarray
+        Array of times corresponding to a time series
+
+    Returns
+    -------
+    hop_length : number
+        Estimated hop length (seconds)
+    '''
+
+    if not len(times):
+        raise ValueError("Cannot estimate hop length from an empty time array.")
+
+    # Make sure the times are sorted
+    times = np.sort(times)
+
+    # Determine where there are no gaps
+    non_gaps = np.append([False], np.isclose(np.diff(times, n=2), 0))
+
+    if not np.sum(non_gaps):
+        raise ValueError("Time observations are too irregular.")
+
+    # Take the median of the time differences at non-gaps
+    return np.median(np.diff(times)[non_gaps])
+
+
+def time_series_to_uniform(times, values, hop_length=None, duration=None):
+    '''Convert a semi-regular time series with gaps into a uniform time series.
+
+    Parameters
+    ----------
+    times : ndarray
+        Array of times corresponding to a time series
+    values : list of ndarray
+        Observations made at times
+    hop_length : number or None (optional)
+        Time interval (seconds) between each observation in the uniform series
+    duration : number or None (optional)
+        Total length (seconds) of times series
+        If specified, should be greater than all observation times
+
+    Returns
+    -------
+    times : ndarray
+        Uniform time array
+    values : ndarray
+        Observations corresponding to uniform times
+    '''
+
+    if not len(times) or not len(values):
+        return np.array([]), []
+
+    if hop_length is None:
+        # If a hop length is not provided, estimate it and throw a warning
+        warnings.warn(
+            "Since hop length is unknown, it will be estimated. This may lead to "
+            "unwanted behavior if the observation times are sporadic or irregular.")
+        hop_length = estimate_hop_length(times)
+
+    if duration is None:
+        # Default the duration to the last reported time in the series
+        duration = times[-1]
+
+    # Determine the total number of observations in the uniform time series
+    num_entries = int(np.ceil(duration / hop_length)) + 1
+
+    # Attempt to fill in blank frames with the appropriate value
+    empty_fill = np.array([])
+    new_values = [empty_fill] * num_entries
+    new_times = hop_length * np.arange(num_entries)
+
+    # Determine which indices the provided observations fall under
+    idcs = np.round(times / hop_length).astype(int)
+
+    # Fill the observed values into their respective locations in the uniform series
+    for i in range(len(idcs)):
+        if times[i] <= duration:
+            new_values[idcs[i]] = values[i]
+
+    return new_times, new_values
diff --git a/tests/test_util.py b/tests/test_util.py
@@ -8,6 +8,8 @@
 import mir_eval
 from mir_eval import util
 
+A_TOL = 1e-12
+
 
 def test_interpolate_intervals():
     """Check that an interval set is interpolated properly, with boundaries
@@ -338,3 +340,94 @@ def __test(x, x_true):
     yield __test, x1, x1_true
     yield __test_labeled, x1_true, labels_true, x1_true, labels_true
     yield __test, x1_true, x1_true
+
+
+def test_estimate_hop_length():
+    times1 = np.array([0.00, 0.01, 0.02, 0.03, 0.04])
+    times2 = np.concatenate((times1, [0.10, 0.11, 0.12, 0.13, 0.14]))
+    times3 = np.array([0.00, 0.01, 0.03, 0.06, 0.10])
+
+    expected_hop = 0.01
+
+    actual_hop1 = mir_eval.util.estimate_hop_length(times1)
+    actual_hop2 = mir_eval.util.estimate_hop_length(times2)
+
+    # NumPy diff() does not always return exact values
+    assert abs(actual_hop1 - expected_hop) < A_TOL
+    assert abs(actual_hop2 - expected_hop) < A_TOL
+
+    nose.tools.assert_raises(ValueError, util.estimate_hop_length, times3)
+
+
+def __times_equal(times_a, times_b):
+    if len(times_a) != len(times_b):
+        return False
+    else:
+        equal = True
+        for time_a, time_b in zip(times_a, times_b):
+            equal = equal and abs(time_a - time_b) < A_TOL
+        return equal
+
+
+def __frequencies_equal(freqs_a, freqs_b):
+    if len(freqs_a) != len(freqs_b):
+        return False
+    else:
+        equal = True
+        for freq_a, freq_b in zip(freqs_a, freqs_b):
+            if freq_a.size != freq_b.size:
+                return False
+            equal = equal and np.allclose(freq_a, freq_b, atol=A_TOL)
+        return equal
+
+
+def test_time_series_to_uniform():
+    times1 = np.array([0.00, 0.01, 0.02, 0.03])
+    times2 = np.array([0.00, 0.01, 0.03, 0.04])
+    times3 = times2
+
+    freqs = [np.array([100.]),
+             np.array([100.]),
+             np.array([200.]),
+             np.array([200.])]
+
+    hop_size = 0.01
+
+    expected_times1 = times1
+    expected_freqs1 = freqs
+
+    expected_times2 = np.array([0.00, 0.01, 0.02, 0.03, 0.04])
+    expected_freqs2 = [np.array([100.]),
+                       np.array([100.]),
+                       np.array([]),
+                       np.array([200.]),
+                       np.array([200.])]
+
+    expected_times3 = hop_size * np.arange(20)
+    expected_freqs3 = [np.array([100.]),
+                       np.array([100.]),
+                       np.array([]),
+                       np.array([200.]),
+                       np.array([200.])] + \
+                      [np.array([])] * 15
+
+    expected_times4 = np.array([0.00, 0.01, 0.02])
+    expected_freqs4 = [np.array([])] * 3
+
+    actual_times1, actual_values1 = mir_eval.util.time_series_to_uniform(times1, freqs, hop_size, None)
+    actual_times2, actual_values2 = mir_eval.util.time_series_to_uniform(times2, freqs, hop_size, None)
+    actual_times3, actual_values3 = mir_eval.util.time_series_to_uniform(times3, freqs, hop_size, 0.195)
+
+    actual_times4, actual_values4 = mir_eval.util.time_series_to_uniform(np.array([]), [], hop_size, times1[-1])
+
+    assert __times_equal(actual_times1, expected_times1)
+    assert __frequencies_equal(actual_values1, expected_freqs1)
+
+    assert __times_equal(actual_times2, expected_times2)
+    assert __frequencies_equal(actual_values2, expected_freqs2)
+
+    assert __times_equal(actual_times3, expected_times3)
+    assert __frequencies_equal(actual_values3, expected_freqs3)
+
+    assert __times_equal(actual_times4, expected_times4)
+    assert __frequencies_equal(actual_values4, expected_freqs4)