-
Notifications
You must be signed in to change notification settings - Fork 0
/
karp_rabin_hash.c
56 lines (52 loc) · 1.79 KB
/
karp_rabin_hash.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#include "karp_rabin_hash.h"
int int_pow(int base, int exp);
int c2i(char c) {
return tolower(c) - 'a';
}
/**
* Creates a generator of hashes for k-grams.
* */
hash_t create_hash_generator(hash_gen_t** generator, int k, long prime, char* first_kgram){
*generator = (hash_gen_t*) malloc(sizeof(struct _hash_gen_t));
int h = 1;
(*generator)->prime = prime;
(*generator)->kgram_len = k;
//Loop for calculating h, the common base-factor
for(int i = 0; i < k - 1; i++){
h = (h * ALPHA) % prime;
}
(*generator)->h = h;
hash_t first_hash = 0;
//Generating the first k-gram
for(int i = 0; i < k; i++){
first_hash += (c2i(first_kgram[i]) + 1) * int_pow(ALPHA, k - i - 1);
}
(*generator)->curr_hash = first_hash;
(*generator)->curr_kgram = first_kgram;
return first_hash;
}
/**
* Generate the next hash for k-gram based on the previous hash (BLOCKCHAIN?)
* */
hash_t generate_next_hash(hash_gen_t* generator, char* new_kgram){
//Using the current k-gram hash to generate the next k-gram hash
hash_t new_hash = generator->curr_hash - ((c2i(generator->curr_kgram[0]) + 1) * int_pow(ALPHA, generator->kgram_len -1));
new_hash *= ALPHA;
new_hash += c2i(new_kgram[generator->kgram_len-1]) + 1;
// printf("curr_kgram: %s, new_kgram: %s, curr_hash: %d, new_hash: %d, thing: %d\n", generator->curr_kgram, new_kgram, generator->curr_hash, new_hash, ((generator->curr_kgram[0] - 'a' + 1) * int_pow(ALPHA, generator->kgram_len -1)));
generator->curr_hash = new_hash;
generator->curr_kgram = new_kgram;
return new_hash % generator->prime;
}
int int_pow(int base, int exp)
{
int result = 1;
while (exp)
{
if (exp & 1)
result *= base;
exp /= 2;
base *= base;
}
return result;
}