Skip to content

Commit

Permalink
Improving levenshtein performance. Refs #84
Browse files Browse the repository at this point in the history
  • Loading branch information
Yomguithereal committed Jan 19, 2017
1 parent 03dce23 commit 2182a27
Show file tree
Hide file tree
Showing 5 changed files with 125 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .npmignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
node_modules
experiments
benchmark
.gitignore
.npmignore
*.yml
Expand Down
32 changes: 32 additions & 0 deletions benchmark/levenshtein.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
require('babel-core/register');

var levenshtein = require('../src/metrics/distance/levenshtein');
var leven = require('leven');

function run(fn) {
fn('a', 'b');
fn('ab', 'ac');
fn('ac', 'bc');
fn('abc', 'axc');
fn('kitten', 'sitting');
fn('xabxcdxxefxgx', '1ab2cd34ef5g6');
fn('cat', 'cow');
fn('xabxcdxxefxgx', 'abcdefg');
fn('javawasneat', 'scalaisgreat');
fn('example', 'samples');
fn('sturgeon', 'urgently');
fn('levenshtein', 'frankenstein');
fn('distance', 'difference');
fn('abcde', 'tes');
fn('因為我是中國人所以我會說中文', '因為我是英國人所以我會說英文');
}

suite('Levenshtein', function() {
bench('talisman', function() {
run(levenshtein);
});

bench('leven', function() {
run(leven);
});
});
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@
"chai": "^3.5.0",
"csv": "^1.1.0",
"eslint": "^3.2.2",
"leven": "^2.0.0",
"matcha": "^0.7.0",
"mocha": "^3.0.2",
"rimraf": "^2.5.2",
"seedrandom": "^2.4.2"
Expand Down
88 changes: 88 additions & 0 deletions src/metrics/distance/levenshtein.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,95 @@
* @param {mixed} b - The second sequence to process.
* @return {number} - The Levenshtein distance between a & b.
*/
function levenshteinForStrings(a, b) {
if (a === b)
return 0;

var tmp;

// Swapping the strings so that the shorter string is the first one.
if (a.length > b.length) {
tmp = a;
a = b;
b = tmp;
}

let la = a.length,
lb = b.length;

if (!la)
return lb;
if (!lb)
return la;

// Ignoring common suffix
while (la > 0 && (a.charCodeAt(la - 1) === b.charCodeAt(lb - 1))) {
la--;
lb--;
}

if (!la)
return lb;

let start = 0;

// Ignoring common prefix
while (start < la && (a.charCodeAt(start) === b.charCodeAt(start)))
start++;

la -= start;
lb -= start;

if (!la)
return lb;

const v0 = new Array(lb);

let i = 0;

while (i < lb)
v0[i] = ++i;

let current = 0;

// Starting the nested loops
for (i = 0; i < la; i++) {
let left = i;

current = i + 1;
const charA = a.charCodeAt(start + i);

for (let j = 0; j < lb; j++) {
const above = current;

const charB = b.charCodeAt(j);
current = left;
left = v0[j];

if (charA !== charB) {

// Insertion
if (left < current)
current = left;

// Deletion
if (above < current)
current = above;

current++;
}

v0[j] = current;
}
}

return current;
};

export default function levenshtein(a, b) {
if (typeof a === 'string')
return levenshteinForStrings(a, b);

if (a === b)
return 0;

Expand Down
1 change: 1 addition & 0 deletions src/structures/bk-tree.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ function searchWord(distance, tree, word, n, acc) {
if (d <= n)
acc.push(tree.word);

// NOTE: this can be improved by for in iteration!
for (let i = d - n, l = d + n + 1; i < l; i++) {
const children = tree.children[i];

Expand Down

0 comments on commit 2182a27

Please sign in to comment.