Skip to content

Commit

Permalink
hash.h, hash.tpp, cauchy.h
Browse files Browse the repository at this point in the history
- Buckets must be a power of 2, so that we can use & instead of the module (so its faster).
- Added Hash_Tab for 8bits and 128bits
- Moved the construction of the cauchy matrix to cauchy.h

mersenne.h
- Defined some new primes spaces

network_sketches.py
- Added a test function

py_agmssketch.h
- Added cw2 as a possible random generator
- Added AGMS8

test_AGMS.py
- Some tests to test the bounds of the AGMS sketch
  • Loading branch information
esterl committed Jul 26, 2014
1 parent a6cb5de commit eb64daf
Show file tree
Hide file tree
Showing 13 changed files with 728 additions and 280 deletions.
108 changes: 108 additions & 0 deletions cauchy.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#ifndef SKETCHES_CAUCHY_H
#define SKETCHES_CAUCHY_H

//Creates the multiplication table for n of length rows in the prime field
inline uint32_t* multiplicationTable(uint32_t n, uint32_t rows, uint32_t prime){
uint32_t *result = new uint32_t[rows];

for (uint32_t i=0; i<rows; i++){
result[i] = (n*i) % prime;
}
return result;
}

//Creates the multiplication tables for a Cauchy matrix of size q*(q-1)
inline uint32_t** cauchyTables(uint32_t rows, unsigned q){
uint32_t prime = (1 << 16) + 1;
// Array with the inverse of 1, 2, 3, etc. mod prime
uint16_t invmodp[14] = { 1, 32769, 21846, 49153, 26215, 10923, 18725, 57345,
7282, 45876, 5958, 38230, 15124, 42131};

uint32_t **tables = new uint32_t*[2*(q-1)];
for (unsigned i=0; i < 2*(q-1); i++){
tables[i] = multiplicationTable(invmodp[i], rows, prime);
}

return tables;
}

// Fit 3 the components into a single uint64_t
inline uint64_t* compressRow(uint32_t rows, uint32_t* c0, uint32_t* c1, uint32_t* c2){
uint64_t *result = new uint64_t[rows];

for (uint32_t i=0; i<rows; i++){
uint64_t x0, x1, x2;
x0 = c0[i]; x1 = c1[i]; x2 = c2[i];
result[i] = x0 + (x1<<21) + (x2<<42);
}
return result;
}

// Fit 2 the components into a single uint64_t
inline uint64_t* compressRow(uint32_t rows, uint32_t* c0, uint32_t* c1){
uint64_t *result = new uint64_t[rows];

for (uint32_t i=0; i<rows; i++){
uint64_t x0, x1;
x0 = c0[i]; x1 = c1[i];
result[i] = x0 + (x1<<21);
}
return result;
}

// Generates a compressed Cauchy matrix of 4 rows and 3 compressed columns
inline uint64_t** GenerateCauchy4(uint32_t rows){
uint32_t** tables = cauchyTables(rows, 4);
unsigned q = 4;

uint64_t** result = new uint64_t*[4];
for (unsigned i=0; i<q; i++)
{
result[i] = compressRow(rows, tables[i], tables[i+1], tables[i+2]);
}

for (unsigned i=0; i< 2*(q-1); i++){
delete [] tables[i];
}
delete [] tables;
return result;
}

struct CauchyRow{
uint64_t *v0, *v1, *v2;
};

// Generates a compressed Cauchy matrix of 8 rows and 7 compressed columns
inline CauchyRow* GenerateCauchy8(uint32_t rows){
unsigned q = 8;
uint32_t** tables = cauchyTables(rows, 8);

CauchyRow* matrix = new CauchyRow[8];

for (unsigned i=0; i<q; i++)
matrix[i].v0 = compressRow(rows, tables[i], tables[i+1], tables[i+2]);

for (unsigned i=0; i<q; i++)
{
unsigned j = i+3;
if (j<q)
matrix[i].v1 = matrix[j].v0;
else
matrix[i].v1 = compressRow(rows, tables[j], tables[j+1]);
}

for (unsigned i=0; i<q-2; i++)
{
matrix[i].v2 = matrix[i+2].v1;
}
matrix[q-2].v2 = compressRow(rows, tables[q-2+5], tables[q-2+6]);
matrix[q-1].v2 = compressRow(rows, tables[q-1+5], tables[q-1+6]);

for (unsigned i=0; i< 2*(q-1); i++){
delete [] tables[i];
}
delete [] tables;

return matrix;
}
#endif // SKETCHES_CAUCHY_H
100 changes: 62 additions & 38 deletions hash.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
#include <stdint.h>

#include "mersenne.h"
#include "cauchy.h"
/* Generating schemes for different types of {B} random variables (or hashes to
the output space B={0..B-1}). B should be representable with an unsigned int.
the output space B={0..B-1}). B should be representable with an unsigned.
Based on the implementation by F. Rusu at:
http://www.cise.ufl.edu/~frusu/code.html
More details can be found on:
Expand All @@ -20,7 +21,7 @@
operations should be defined for T:
* operator+
* operator>>
* operator unsigned int
* operator unsigned
* operator ^=
* operator &
* operator *
Expand All @@ -30,7 +31,7 @@ template<typename T>
class Hash
{
public:
virtual unsigned int element(T j) = 0;
virtual unsigned element(T j) = 0;
virtual Hash<T>* copy() {return NULL;};
virtual ~Hash(){};
};
Expand All @@ -45,14 +46,14 @@ template<typename T1, typename T2>
class Hash_CW2: Hash<T1>{
protected:
T2 seeds[2];
unsigned int num_buckets;
unsigned int mersenne_exponent;
unsigned mask;
unsigned mersenne_exponent;

public:
Hash_CW2(T2 I1, T2 I2, unsigned int B);
Hash_CW2(unsigned int B);
Hash_CW2(unsigned B, T2 seed0, T2 seed1);
Hash_CW2(unsigned B) : Hash_CW2(B, random<T2>(), random<T2>()){};

virtual unsigned int element(T1 j);
virtual unsigned element(T1 j);
virtual Hash<T1>* copy();

virtual ~Hash_CW2();
Expand All @@ -65,48 +66,66 @@ template<typename T1, typename T2>
class Hash_CW4: Hash<T1> {
protected:
T2 seeds[4];
unsigned int num_buckets;
unsigned int mersenne_exponent;
unsigned mask;
unsigned mersenne_exponent;

public:
Hash_CW4(T2 I1, T2 I2, T2 I3, T2 I4, unsigned int B);
Hash_CW4(unsigned int B);
Hash_CW4(unsigned B, T2 seed0, T2 seed1, T2 seed2, T2 seed3);
Hash_CW4(unsigned B) : Hash_CW4(B, random<T2>(), random<T2>(),
random<T2>(), random<T2>()) {};
Hash_CW4(unsigned B, T2 *seeds) : Hash_CW4(B, seeds[0], seeds[1],
seeds[2], seeds[3]) {};

virtual unsigned int element(T1 j);
virtual unsigned element(T1 j);
virtual Hash<T1>* copy();

virtual ~Hash_CW4();
};

/* Tabulated Hashing as presented by Mikkel Thorup and Yin Zhang in "Tabulation
Based 4-Universal Hashing with Applications to Second Moment Estimation" for
generating 4-wise independent random variables from {B}.*/
generating 4-wise independent random variables from {B}. We expect B to be a
power of 2 below 2^16, so that the pre-computed tables can be kept smaller.*/
template<typename T>
class Hash_Tab: Hash<T>{
protected:
unsigned int num_buckets;

public:
Hash_Tab(unsigned B);

virtual unsigned int element(T j);
virtual unsigned element(T j);
virtual Hash<T>* copy();

virtual ~Hash_Tab();
};

template<>
class Hash_Tab<uint8_t>: Hash<uint8_t>{
protected:
uint16_t * table;

public:
Hash_Tab(unsigned B, prime13_t seed0, prime13_t seed1, prime13_t seed2, prime13_t seed3);
Hash_Tab(unsigned B) : Hash_Tab(B, random<prime13_t>(), random<prime13_t>(),
random<prime13_t>(), random<prime13_t>()) {};
Hash_Tab();

virtual unsigned element(uint8_t j);
virtual Hash<uint8_t>* copy();

virtual ~Hash_Tab();
};

template<>
class Hash_Tab<uint16_t>: Hash<uint16_t>{
protected:
unsigned int num_buckets;
uint16_t* table;

public:
Hash_Tab(uint64_t I1, uint64_t I2, uint64_t I3, uint64_t I4, unsigned int B);
Hash_Tab(unsigned B);
Hash_Tab(unsigned B, prime17_t s0, prime17_t s1, prime17_t s2, prime17_t s3);
Hash_Tab(unsigned B) : Hash_Tab(B, random<prime17_t>(), random<prime17_t>(),
random<prime17_t>(), random<prime17_t>()) {};
Hash_Tab();

virtual unsigned int element(uint16_t j);
virtual unsigned element(uint16_t j);
virtual Hash<uint16_t>* copy();

virtual ~Hash_Tab();
Expand All @@ -115,15 +134,15 @@ class Hash_Tab<uint16_t>: Hash<uint16_t>{
template<>
class Hash_Tab<uint32_t>: Hash<uint32_t>{
protected:
unsigned int num_buckets;
unsigned mask;
uint16_t *T0, *T1, *T2;

public:
Hash_Tab(prime61_t* seeds0, prime61_t* seeds1, prime61_t* seeds2, unsigned int B);
Hash_Tab(unsigned B);
Hash_Tab(unsigned B, prime31_t* s0, prime31_t* s1, prime31_t* s2);
Hash_Tab(unsigned B): Hash_Tab(B, NULL, NULL, NULL){};
Hash_Tab();

virtual unsigned int element(uint32_t j);
virtual unsigned element(uint32_t j);
virtual Hash<uint32_t>* copy();

virtual ~Hash_Tab();
Expand All @@ -132,37 +151,42 @@ class Hash_Tab<uint32_t>: Hash<uint32_t>{
template<>
class Hash_Tab<uint64_t>: Hash<uint64_t>{
protected:
unsigned int num_buckets;
unsigned mask;
uint16_t *T0, *T1, *T2, *T3, *T4, *T5, *T6;
uint64_t **cauchy;

uint64_t** getCauchy();

public:
Hash_Tab(prime61_t* seeds0, prime61_t* seeds1, prime61_t* seeds2,
prime61_t* seeds3, prime61_t* seeds4, prime61_t* seeds5,
prime61_t* seeds6, unsigned int B);
Hash_Tab(unsigned B, prime31_t* seeds0, prime31_t* seeds1, prime31_t* seeds2,
prime31_t* seeds3, prime31_t* seeds4, prime31_t* seeds5,
prime31_t* seeds6);
Hash_Tab(unsigned B);
Hash_Tab();

virtual unsigned int element(uint64_t j);
virtual unsigned element(uint64_t j);
virtual Hash<uint64_t>* copy();

virtual ~Hash_Tab();
};

template<>
class Hash_Tab<uint128>: Hash<uint128>{
class Hash_Tab<uint128_t>: Hash<uint128_t>{
protected:
unsigned int num_buckets;
uint16_t **T;
unsigned mask;
CauchyRow* cauchy;
CauchyRow* getCauchy();

public:
Hash_Tab(unsigned B){ num_buckets = B;};
Hash_Tab(unsigned B, prime31_t** seeds=NULL);
Hash_Tab(){};

unsigned int element(uint128 j){ return 0;};
Hash<uint128>* copy(){ };
unsigned element(uint128_t j);
Hash<uint128_t>* copy();

~Hash_Tab(){};
~Hash_Tab();
};

#include "hash.tpp"

#endif // SKETCHES_HASH_H
Loading

0 comments on commit eb64daf

Please sign in to comment.