From 09cc81f42501d9c92f22265e64db2cca6a7ed342 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Tue, 18 Jun 2019 19:45:02 +0200 Subject: [PATCH] binary-search fixes: - always use the NEEDLE as the first argument for the comparison function. Otherwise you get very nasty comparisons (which don't hurt, but are utterly useless) like these in *indexed maps* at least: watch for the compareValue/compareValue2 = **NaN** log lines below: this is due to a useless (due to way it was invoked!) call to `aCompare` at the end of binary-search. The log below is a heavily instrumented `originalPositionFor()` API test run which was failing for indexed maps. ``` originalPositionFor: { generatedLine: 2, generatedColumn: 3 } # search round 1 binarySearch SECTIONS: { needle: { generatedLine: 2, generatedColumn: 3 }, 'section.generatedOffset': { generatedLine: 1, generatedColumn: 1 }, compareCheck: 1, compareValue2: 3 } # search round 2 binarySearch SECTIONS: { needle: { generatedLine: 2, generatedColumn: 3 }, 'section.generatedOffset': { generatedLine: 1, generatedColumn: 3 }, compareCheck: 1, compareValue2: 1 } # while loop round 1; always fails thanks to NaNs. # That ain't no needle being fed to it! binarySearch SECTIONS: { needle: { generatedOffset: { generatedLine: 1, generatedColumn: 3 }, consumer: BasicSourceMapConsumer { _sourceLookupCache: Map {}, _names: [Object], _sources: [Object], _absoluteSources: [Object], sourceRoot: null, sourcesContent: null, _mappings: 'AAAAA,CCCCC', _sourceMapURL: undefined, file: null, _computedColumnSpans: false, _mappingsPtr: 1114216, _wasm: [Object] } }, 'section.generatedOffset': { generatedLine: 1, generatedColumn: 1 }, compareCheck: NaN, compareValue2: NaN } binarySearch SECTIONS --> found @ index: { sectionIndex: 1, section: { generatedOffset: { generatedLine: 1, generatedColumn: 3 }, consumer: BasicSourceMapConsumer { _sourceLookupCache: Map {}, _names: [Object], _sources: [Object], _absoluteSources: [Object], sourceRoot: null, sourcesContent: null, _mappings: 'AAAAA,CCCCC', _sourceMapURL: undefined, file: null, _computedColumnSpans: false, _mappingsPtr: 1114216, _wasm: [Object] } } } # and this is what the API gives back. Unexpected, very probably wrong, # but that's for another PR. The key element here is the faulty use/code # of aCompare/search. # # 'rv' = Return Value originalPositionFor: { line: 2, column: 3, rv: { source: null, line: null, column: null, name: null } } ``` - kill the fixed 3rd *undocumented* argument for the compare function. It's been with us since (SHA-1: 5241b06524851811ac849386797aaf2dd6104a76 * Always return the smallest element when there is more than one match) and has no function / is **unused**. --- lib/binary-search.js | 7 ++++--- test/test-binary-search.js | 26 ++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/lib/binary-search.js b/lib/binary-search.js index d6f898ea..3d8f6003 100644 --- a/lib/binary-search.js +++ b/lib/binary-search.js @@ -32,7 +32,7 @@ function recursiveSearch(aLow, aHigh, aNeedle, aHaystack, aCompare, aBias) { // 3. We did not find the exact element, and there is no next-closest // element than the one we are searching for, so we return -1. const mid = Math.floor((aHigh - aLow) / 2) + aLow; - const cmp = aCompare(aNeedle, aHaystack[mid], true); + const cmp = aCompare(aNeedle, aHaystack[mid]); if (cmp === 0) { // Found the element we are looking for. return mid; @@ -95,9 +95,10 @@ exports.search = function search(aNeedle, aHaystack, aCompare, aBias) { // We have found either the exact element, or the next-closest element than // the one we are searching for. However, there may be more than one such - // element. Make sure we always return the smallest of these. + // element ONLY IFF we found an exact match. + // Make sure we always return the smallest of these. while (index - 1 >= 0) { - if (aCompare(aHaystack[index], aHaystack[index - 1], true) !== 0) { + if (aCompare(aNeedle, aHaystack[index - 1]) !== 0) { break; } --index; diff --git a/test/test-binary-search.js b/test/test-binary-search.js index 5a31aea7..4b62c2f5 100644 --- a/test/test-binary-search.js +++ b/test/test-binary-search.js @@ -94,3 +94,29 @@ exports["test multiple matches at the beginning"] = function(assert) { assert.equal(binarySearch.search(needle, haystack, numberCompare, binarySearch.LEAST_UPPER_BOUND), 0); }; + +exports["test fuzzy match with duplicates in the data at match point"] = function(assert) { + const needle = 2; + const haystack = [1, 1, 5, 5, 5, 5, 13, 21]; + + assert.equal( + binarySearch.search( + needle, + haystack, + numberCompare, + binarySearch.LEAST_UPPER_BOUND + ), + 2 + ); + + assert.equal( + binarySearch.search( + needle, + haystack, + numberCompare, + binarySearch.GREATEST_LOWER_BOUND + ), + 1 + ); +}; +