diff --git a/src/api/util.cpp b/src/api/util.cpp index efbd4945..0ca5020a 100644 --- a/src/api/util.cpp +++ b/src/api/util.cpp @@ -194,8 +194,10 @@ vec2 gridpp::calc_quantile(const vec3& array, const vec2& quantile) { if(X == 0) return vec2(); int T = array[0][0].size(); - if(T == 0) - return vec2(); + if(T == 0) { + vec2 output = gridpp::init_vec2(Y, X, gridpp::MV); + return output; + } vec2 output = gridpp::init_vec2(Y, X); for(int y = 0; y < Y; y++) { for(int x = 0; x < X; x++) { @@ -257,48 +259,76 @@ double gridpp::clock() { return sec + msec/1e6; } vec gridpp::calc_even_quantiles(const vec& values, int num) { - if(num >= values.size()) - return values; - vec quantiles; - if(num == 0) + int size = values.size(); + if(num == 0 || size == 0) return quantiles; vec sorted = values; std::sort(sorted.begin(), sorted.end()); - quantiles.reserve(num); - quantiles.push_back(sorted[0]); + // Asking for more quantiles than we have data points + if(num >= size) { + // Find the unique values + quantiles.reserve(num); + quantiles.push_back(sorted[0]); + for(int i = 1; i < size; i++) { + if(sorted[i] != sorted[i-1]) + quantiles.push_back(sorted[i]); + } + return quantiles; + } + + + float lowest = sorted[0]; + float highest = sorted[size - 1]; + + // If there are multiple identical values at the start, pick the first value past the set of + // identical values int count_lower = 0; for(int i = 0; i < sorted.size(); i++) { - if(sorted[i] != quantiles[0]) + if(sorted[i] != lowest) break; count_lower++; } - int size = sorted.size(); - if(count_lower > size / num) + quantiles.reserve(num); + quantiles.push_back(lowest); + float last_added = lowest; + if(num == 2) { + if(lowest != highest) + quantiles.push_back(highest); + return quantiles; + } + + // Add the first past the lowest set of repeated values + bool repeated_at_beginning = count_lower < size && count_lower > size / num; + if(repeated_at_beginning) { + assert(count_lower < sorted.size()); quantiles.push_back(sorted[count_lower]); + } + last_added = quantiles[quantiles.size() - 1]; // Remove duplicates - vec values_unique; - values_unique.reserve(sorted.size()); - float last_quantile = quantiles[quantiles.size() - 1]; + vec remaining_unique_values; + remaining_unique_values.reserve(sorted.size()); for(int i = 0; i < sorted.size(); i++) { - if(sorted[i] > last_quantile && (values_unique.size() == 0 || sorted[i] != values_unique[values_unique.size() - 1])) - values_unique.push_back(sorted[i]); + if(sorted[i] > last_added && (remaining_unique_values.size() == 0 || sorted[i] != remaining_unique_values[remaining_unique_values.size() - 1])) + remaining_unique_values.push_back(sorted[i]); } - if(values_unique.size() > 0) { + + if(remaining_unique_values.size() > 0) { int num_left = num - quantiles.size(); for(int i = 1; i <= num_left; i++) { float f = float(i) / (num_left); - int index = values_unique.size() * f - 1; + int index = remaining_unique_values.size() * f - 1; if(index >= 0) { - float value = values_unique[index]; + assert(index < remaining_unique_values.size()); + float value = remaining_unique_values[index]; quantiles.push_back(value); } else { - std::cout << i << " " << f << " " << index << " " << num_left << " " << values_unique.size() << std::endl; - std::cout << count_lower << " " << sorted.size() << " " << last_quantile << std::endl; + std::cout << i << " " << f << " " << index << " " << num_left << " " << remaining_unique_values.size() << std::endl; + std::cout << count_lower << " " << sorted.size() << " " << last_added << std::endl; throw std::runtime_error("Internal error in calc_even_quantiles."); } } diff --git a/tests/test_util.py b/tests/test_util.py index 278d508e..60e88932 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -78,11 +78,40 @@ def test_calc_quantile(self): self.assertEqual(len(quantile_of_nan_list), 1) self.assertTrue(np.isnan(quantile_of_nan_list[0])) + def test_calc_quantile_spatially_varying(self): + Q = 5 + Y = 3 + X = 2 + input = np.reshape(np.arange(Y * X * Q), [Y, X, Q]) + levels = np.reshape([0.25, 0.6]*int(Y*X/2), [Y, X]) + output = gridpp.calc_quantile(input, levels) + expected = np.reshape([1, 7.4, 11, 17.4, 21, 27.4], [Y, X]) + np.testing.assert_array_almost_equal(output, expected) + + def test_calc_quantile_spatially_varying_invalid_arguments(self): + with self.assertRaises(ValueError) as e: + # Dimension mismatch + gridpp.calc_quantile(np.zeros([2, 3, 4]), np.zeros([1,3])) + + def test_calc_quantile_spatially_varying_empty(self): + output = gridpp.calc_quantile(np.zeros([2, 0, 3]), np.zeros([2, 0])) + np.testing.assert_array_almost_equal(output.shape, [0, 0]) + + output = gridpp.calc_quantile(np.zeros([0, 2, 3]), np.zeros([0, 2])) + np.testing.assert_array_almost_equal(output.shape, [0, 0]) + + output = gridpp.calc_quantile(np.zeros([2, 2, 0]), np.zeros([2, 2])) + np.testing.assert_array_almost_equal(output, np.nan*np.zeros([2, 2])) + def test_calc_quantile_invalid_argument(self): quantiles = [1.1, -0.1] for quantile in quantiles: with self.assertRaises(ValueError) as e: gridpp.calc_quantile([0, 1, 2], quantile) + with self.assertRaises(ValueError) as e: + gridpp.calc_quantile([[0, 1, 2]], quantile) + with self.assertRaises(ValueError) as e: + gridpp.calc_quantile(np.zeros([2,3,4]), quantile*np.ones([2,3])) self.assertTrue(np.isnan(gridpp.calc_quantile([0, 1, 2], np.nan))) def test_calc_statistic_randomchoice(self): @@ -187,5 +216,61 @@ def test_set_debug_level(self): gridpp.set_debug_level(level) self.assertEqual(level, gridpp.get_debug_level()) + def test_debug(self): + gridpp.debug("test") + gridpp.debug("") + + def test_future_deprecation_warning(self): + gridpp.future_deprecation_warning("test") + gridpp.future_deprecation_warning("") + gridpp.future_deprecation_warning("test", "other") + gridpp.future_deprecation_warning("", "other") + gridpp.future_deprecation_warning("", "") + + def test_calc_even_quantiles(self): + output = gridpp.calc_even_quantiles([1,2,3], 2) + np.testing.assert_array_almost_equal(output, [1,3]) + + # First and last + output = gridpp.calc_even_quantiles(range(10), 2) + np.testing.assert_array_almost_equal(output, [0,9]) + + # Repeated first number + output = gridpp.calc_even_quantiles([1,1,1,1,1,5, 10], 3) + np.testing.assert_array_almost_equal(output, [1, 5, 10]) + + output = gridpp.calc_even_quantiles([1,1,1,1,1,5, 10], 2) + np.testing.assert_array_almost_equal(output, [1, 10]) + + output = gridpp.calc_even_quantiles([1,1,1,1,4,5, 10], 3) + np.testing.assert_array_almost_equal(output, [1, 4, 10]) + + # Repeated numbers + for num in [1, 2, 3]: + with self.subTest(num=num): + output = gridpp.calc_even_quantiles([1,1,1], num) + np.testing.assert_array_almost_equal(output, [1]) + + # Too little data + output = gridpp.calc_even_quantiles([1,2,3], 3) + np.testing.assert_array_almost_equal(output, [1,2,3]) + + # Too little data with repeated + output = gridpp.calc_even_quantiles([1,1,3], 3) + np.testing.assert_array_almost_equal(output, [1,3]) + + output = gridpp.calc_even_quantiles([1], 2) + np.testing.assert_array_almost_equal(output, [1]) + + # Empty arrays + output = gridpp.calc_even_quantiles([1,2,3], 0) + np.testing.assert_array_almost_equal(output, []) + + output = gridpp.calc_even_quantiles([], 0) + np.testing.assert_array_almost_equal(output, []) + + output = gridpp.calc_even_quantiles([], 2) + np.testing.assert_array_almost_equal(output, []) + if __name__ == '__main__': unittest.main()