Skip to content

Commit

Permalink
feat: add levenshtein distance calculator
Browse files Browse the repository at this point in the history
  • Loading branch information
Soptq committed Jan 24, 2024
1 parent 7474cca commit bf56b57
Show file tree
Hide file tree
Showing 5 changed files with 188 additions and 0 deletions.
4 changes: 4 additions & 0 deletions src/searching/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,7 @@ The binary search algorithm is a simple search in an ordered array-like compound
## [Dijkstra](./src/dijkstra.cairo)

Dijkstra's algorithm is a graph search algorithm that finds the shortest path from a source node to all other nodes in a weighted graph, ensuring the shortest distances are progressively updated as it explores nodes. It maintains a priority queue of nodes based on their tentative distances from the source and greedily selects the node with the smallest distance at each step.

## [Levenshtein distance](./src/levenshtein_distance.cairo)

The Levenshtein distance is a string metric for measuring the difference between two sequences. It is the minimum number of single-character edits (insertions, deletions, or substitutions) required to change one string into the other. This version of the algorithm optmizes the space complexity. Time complexity: O(nm). Space complexity: O(n),
68 changes: 68 additions & 0 deletions src/searching/src/levenshtein_distance.cairo
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// The Levenshtein Distance
use dict::Felt252DictTrait;


// Compute the edit distance between two byte arrays
/// * `arr1` - The first byte array.
/// * `arr2` - The second byte array.
/// # Returns
/// * `usize` - The edit distance between the two byte arrays.
fn levenshtein_distance(arr1: @ByteArray, arr2: @ByteArray) -> usize {
let m = arr1.len();
let n = arr2.len();

if m == 0 {
return n;
}

let mut prev_dist = felt252_dict_new::<usize>();
let mut init_index: usize = 0;
loop {
if init_index == m + 1 {
break;
}
prev_dist.insert(init_index.into(), init_index);
init_index += 1;
};

let mut row: usize = 0;
loop {
if row == n {
break;
}
let c2 = arr2.at(row).unwrap();
let mut prev_substitution_cost = prev_dist.get(0);
prev_dist.insert(0, row + 1);

let mut col: usize = 0;
loop {
if col == m {
break;
}
let c1 = arr1.at(col).unwrap();
let deletion_cost = prev_dist.get(col.into()) + 1;
let insertion_cost = prev_dist.get((col + 1).into()) + 1;
let substitution_cost = if c1 == c2 {
prev_substitution_cost
} else {
prev_substitution_cost + 1
};

prev_substitution_cost = prev_dist.get((col + 1).into());
let mut min_cost = deletion_cost;
if insertion_cost < min_cost {
min_cost = insertion_cost;
}
if substitution_cost < min_cost {
min_cost = substitution_cost;
}
prev_dist.insert((col + 1).into(), min_cost);

col += 1
};

row += 1;
};

prev_dist.get(m.into())
}
1 change: 1 addition & 0 deletions src/searching/src/lib.cairo
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
mod binary_search;
mod dijkstra;
mod levenshtein_distance;

#[cfg(test)]
mod tests;
1 change: 1 addition & 0 deletions src/searching/src/tests.cairo
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
mod binary_search_test;
mod dijkstra_test;
mod levenshtein_distance_test;
114 changes: 114 additions & 0 deletions src/searching/src/tests/levenshtein_distance_test.cairo
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
use alexandria_searching::levenshtein_distance::levenshtein_distance;


#[test]
#[available_gas(5000000)]
fn bm_search_test_1() {
// FROG -> 46,52,4f,47
let mut arr1: ByteArray = Default::default();
arr1.append_byte(0x46_u8);
arr1.append_byte(0x52_u8);
arr1.append_byte(0x4f_u8);
arr1.append_byte(0x47_u8);
// DOG -> 44,4f,47
let mut arr2: ByteArray = Default::default();
arr2.append_byte(0x44_u8);
arr2.append_byte(0x4f_u8);
arr2.append_byte(0x47_u8);

let dist = levenshtein_distance(@arr1, @arr2);
assert(dist == 2, 'invalid result');
}

#[test]
#[available_gas(5000000)]
fn bm_search_test_2() {
let mut arr1: ByteArray = Default::default();
let mut arr2: ByteArray = Default::default();

let dist = levenshtein_distance(@arr1, @arr2);
assert(dist == 0, 'invalid result');
}

#[test]
#[available_gas(5000000)]
fn bm_search_test_3() {
let mut arr1: ByteArray = Default::default();
let mut arr2: ByteArray = Default::default();
arr2.append_byte(0x61_u8);

let dist = levenshtein_distance(@arr1, @arr2);
assert(dist == 1, 'invalid result');
}

#[test]
#[available_gas(5000000)]
fn bm_search_test_4() {
let mut arr1: ByteArray = Default::default();
arr1.append_byte(0x61_u8);
let mut arr2: ByteArray = Default::default();

let dist = levenshtein_distance(@arr1, @arr2);
assert(dist == 1, 'invalid result');
}

#[test]
#[available_gas(5000000)]
fn bm_search_test_5() {
let mut arr1: ByteArray = Default::default();
arr1.append_byte(0x61_u8);
arr1.append_byte(0x62_u8);
let mut arr2: ByteArray = Default::default();
arr2.append_byte(0x61_u8);

let dist = levenshtein_distance(@arr1, @arr2);
assert(dist == 1, 'invalid result');
}

#[test]
#[available_gas(5000000)]
fn bm_search_test_6() {
// foobar -> 66,6f,6f,62,61,72
let mut arr1: ByteArray = Default::default();
arr1.append_byte(0x66_u8);
arr1.append_byte(0x6f_u8);
arr1.append_byte(0x6f_u8);
arr1.append_byte(0x62_u8);
arr1.append_byte(0x61_u8);
arr1.append_byte(0x72_u8);
// foobar -> 66,6f,6f,62,61,72
let mut arr2: ByteArray = Default::default();
arr2.append_byte(0x66_u8);
arr2.append_byte(0x6f_u8);
arr2.append_byte(0x6f_u8);
arr2.append_byte(0x62_u8);
arr2.append_byte(0x61_u8);
arr2.append_byte(0x72_u8);

let dist = levenshtein_distance(@arr1, @arr2);
assert(dist == 0, 'invalid result');
}

#[test]
#[available_gas(5000000)]
fn bm_search_test_7() {
// foobar -> 66,6f,6f,62,61,72
let mut arr1: ByteArray = Default::default();
arr1.append_byte(0x66_u8);
arr1.append_byte(0x6f_u8);
arr1.append_byte(0x6f_u8);
arr1.append_byte(0x62_u8);
arr1.append_byte(0x61_u8);
arr1.append_byte(0x72_u8);
// barfoo -> 62,61,72,66,6f,6f
let mut arr2: ByteArray = Default::default();
arr2.append_byte(0x62_u8);
arr2.append_byte(0x61_u8);
arr2.append_byte(0x72_u8);
arr2.append_byte(0x66_u8);
arr2.append_byte(0x6f_u8);
arr2.append_byte(0x6f_u8);

let dist = levenshtein_distance(@arr1, @arr2);
assert(dist == 6, 'invalid result');
}

0 comments on commit bf56b57

Please sign in to comment.