Skip to content

Commit

Permalink
feat: add Boyer-Moore algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
Soptq committed Jan 24, 2024
1 parent 7474cca commit 5c11922
Show file tree
Hide file tree
Showing 5 changed files with 230 additions and 0 deletions.
6 changes: 6 additions & 0 deletions src/searching/README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Searching

## [Boyer-Moore algorithm](./src/bm_search.cairo)

The Boyer-Moore algorithm is a string-searching algorithm that finds the position of a pattern in a string. It preprocesses the pattern to create two lookup tables: one for the bad character rule and one for the good suffix rule. The bad character rule shifts the pattern to align with the last occurrence of the mismatched character in the pattern. The good suffix rule shifts the pattern to align with the last occurrence of the suffix of the pattern that matches the suffix of the text.

The Boyer-Moore algorithm has a best-case time complexity of O(n/m) and a worst-case time complexity of O(nm), where n is the length of the text and m is the length of the pattern. It is the most efficient string-searching algorithm in practice.

## [Binary search](./src/binary_search.cairo)

The binary search algorithm is a simple search in an ordered array-like compound. It starts by comparing the value we are looking for to the middle of the array. If it's not a match, the function calls itself recursively on the right or left half of the array until it does(n't) find the value in the array.
Expand Down
72 changes: 72 additions & 0 deletions src/searching/src/bm_search.cairo
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// The Boyer-Moore string search algorithm
use dict::Felt252DictTrait;

// Find `pattern` in `text` and return the index of every match.
/// * `text` - The text to search in.
/// * `pattern` - The pattern to search for.
/// # Returns
/// * `Array<usize>` - The index of every match.
fn bm_search(text: @ByteArray, pattern: @ByteArray) -> Array<usize> {
let mut positions: Array<usize> = array![];
let n = text.len();
let m = pattern.len();
if n == 0 || m == 0 || m > n {
return positions;
}

let mut collection = felt252_dict_new::<usize>();
let mut collect_id = 0;
loop {
if collect_id == m {
break;
}
let c = pattern.at(collect_id).unwrap();
collection
.insert(
c.into(), collect_id + 1
); // avoid 0 since felt252_dict init every entry to 0 by default
collect_id += 1;
};

let mut shift: usize = 0;
loop {
if shift > n - m {
break;
}

let mut j = m;
loop {
if j == 0 || @pattern.at(j - 1).unwrap() != @text.at(shift + j - 1).unwrap() {
break;
}
j -= 1;
};
if j == 0 {
positions.append(shift);
let add_to_shift = {
if shift + m < n {
let c = text.at(shift + m).unwrap();
let index = collection.get(c.into());
if index == 0 {
m + 1
} else {
m - index + 1
}
} else {
1
}
};
shift += add_to_shift;
} else {
let c = text.at(shift + j - 1).unwrap();
let index = collection.get(c.into());
if j <= (index + 1) {
shift += 1;
} else {
shift += j - index;
}
}
};

positions
}
1 change: 1 addition & 0 deletions src/searching/src/lib.cairo
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
mod binary_search;
mod bm_search;
mod dijkstra;

#[cfg(test)]
Expand Down
1 change: 1 addition & 0 deletions src/searching/src/tests.cairo
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
mod binary_search_test;
mod bm_search_test;
mod dijkstra_test;
150 changes: 150 additions & 0 deletions src/searching/src/tests/bm_search_test.cairo
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
use alexandria_searching::bm_search::bm_search;


// Check if two arrays are equal.
/// * `a` - The first array.
/// * `b` - The second array.
/// # Returns
/// * `bool` - True if the arrays are equal, false otherwise.
fn is_equal(mut a: Span<u32>, mut b: Span<u32>) -> bool {
if a.len() != b.len() {
return false;
}
loop {
match a.pop_front() {
Option::Some(val1) => {
let val2 = b.pop_front().unwrap();
if *val1 != *val2 {
break false;
}
},
Option::None => { break true; },
};
}
}


#[test]
#[available_gas(5000000)]
fn bm_search_test_1() {
// AABCAB12AFAABCABFFEGABCAB -> 41,41,42,43,41,42,31,32,41,46,41,41,42,43,41,42,46,46,45,47,41,42,43,41,42
let mut text: ByteArray = Default::default();
text.append_byte(0x41_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
text.append_byte(0x43_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
text.append_byte(0x31_u8);
text.append_byte(0x32_u8);
text.append_byte(0x41_u8);
text.append_byte(0x46_u8);
text.append_byte(0x41_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
text.append_byte(0x43_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
text.append_byte(0x46_u8);
text.append_byte(0x46_u8);
text.append_byte(0x45_u8);
text.append_byte(0x47_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
text.append_byte(0x43_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
// ABCAB -> 41,42,43,41,42
let mut pattern: ByteArray = Default::default();
pattern.append_byte(0x41_u8);
pattern.append_byte(0x42_u8);
pattern.append_byte(0x43_u8);
pattern.append_byte(0x41_u8);
pattern.append_byte(0x42_u8);

let positions = bm_search(@text, @pattern);
let ground_truth: Array<usize> = array![1, 11, 20];
assert(is_equal(positions.span(), ground_truth.span()), 'invalid result');
}

#[test]
#[available_gas(5000000)]
fn bm_search_test_2() {
// AABCAB12AFAABCABFFEGABCAB -> 41,41,42,43,41,42,31,32,41,46,41,41,42,43,41,42,46,46,45,47,41,42,43,41,42
let mut text: ByteArray = Default::default();
text.append_byte(0x41_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
text.append_byte(0x43_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
text.append_byte(0x31_u8);
text.append_byte(0x32_u8);
text.append_byte(0x41_u8);
text.append_byte(0x46_u8);
text.append_byte(0x41_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
text.append_byte(0x43_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
text.append_byte(0x46_u8);
text.append_byte(0x46_u8);
text.append_byte(0x45_u8);
text.append_byte(0x47_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
text.append_byte(0x43_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
// FFF -> 46,46,46
let mut pattern: ByteArray = Default::default();
pattern.append_byte(0x46_u8);
pattern.append_byte(0x46_u8);
pattern.append_byte(0x46_u8);

let positions = bm_search(@text, @pattern);
let ground_truth: Array<usize> = array![];
assert(is_equal(positions.span(), ground_truth.span()), 'invalid result');
}

#[test]
#[available_gas(5000000)]
fn bm_search_test_3() {
// AABCAB12AFAABCABFFEGABCAB -> 41,41,42,43,41,42,31,32,41,46,41,41,42,43,41,42,46,46,45,47,41,42,43,41,42
let mut text: ByteArray = Default::default();
text.append_byte(0x41_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
text.append_byte(0x43_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
text.append_byte(0x31_u8);
text.append_byte(0x32_u8);
text.append_byte(0x41_u8);
text.append_byte(0x46_u8);
text.append_byte(0x41_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
text.append_byte(0x43_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
text.append_byte(0x46_u8);
text.append_byte(0x46_u8);
text.append_byte(0x45_u8);
text.append_byte(0x47_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
text.append_byte(0x43_u8);
text.append_byte(0x41_u8);
text.append_byte(0x42_u8);
// CAB -> 43,41,42
let mut pattern: ByteArray = Default::default();
pattern.append_byte(0x43_u8);
pattern.append_byte(0x41_u8);
pattern.append_byte(0x42_u8);

let positions = bm_search(@text, @pattern);
let ground_truth: Array<usize> = array![3, 13, 22];
assert(is_equal(positions.span(), ground_truth.span()), 'invalid result');
}

0 comments on commit 5c11922

Please sign in to comment.