Skip to content

Commit

Permalink
Add from_buffer() method to BloomFilter c extension.
Browse files Browse the repository at this point in the history
Also fixes a bug where the bloomfilter buffer was not zeroed-out
properly during initialization.

Fixes #2056
  • Loading branch information
coleifer committed Nov 24, 2019
1 parent 08116a5 commit 1247834
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 2 deletions.
18 changes: 17 additions & 1 deletion playhouse/_sqlite_ext.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1099,7 +1099,7 @@ seeds[:] = [0, 1337, 37, 0xabcd, 0xdead, 0xface, 97, 0xed11, 0xcad9, 0x827b]
cdef bf_t *bf_create(size_t size):
cdef bf_t *bf = <bf_t *>calloc(1, sizeof(bf_t))
bf.size = size
bf.bits = malloc(size)
bf.bits = calloc(1, size)
return bf

@cython.cdivision(True)
Expand Down Expand Up @@ -1152,6 +1152,9 @@ cdef class BloomFilter(object):
if self.bf:
bf_free(self.bf)

def __len__(self):
return self.bf.size

def add(self, *keys):
cdef bytes bkey

Expand All @@ -1171,6 +1174,19 @@ cdef class BloomFilter(object):
# embedded NULL bytes.
return buf

@classmethod
def from_buffer(cls, data):
cdef:
char *buf
Py_ssize_t buflen
BloomFilter bloom

PyBytes_AsStringAndSize(data, &buf, &buflen)

bloom = BloomFilter(buflen)
memcpy(bloom.bf.bits, <void *>buf, buflen)
return bloom

@classmethod
def calculate_size(cls, double n, double p):
cdef double m = ceil((n * log(p)) / log(1.0 / (pow(2.0, log(2.0)))))
Expand Down
31 changes: 30 additions & 1 deletion tests/cysqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,9 +376,11 @@ def test_bloomfilter(self):


class TestBloomFilter(BaseTestCase):
n = 1024

def setUp(self):
super(TestBloomFilter, self).setUp()
self.bf = BloomFilter(1024)
self.bf = BloomFilter(self.n)

def test_bloomfilter(self):
keys = ('charlie', 'huey', 'mickey', 'zaizee', 'nuggie', 'foo', 'bar',
Expand All @@ -392,6 +394,33 @@ def test_bloomfilter(self):
self.assertFalse(key + '-y' in self.bf)
self.assertFalse(key + ' ' in self.bf)

def test_bloomfilter_buffer(self):
self.assertEqual(len(self.bf), self.n)

# Buffer is all zeroes when uninitialized.
buf = self.bf.to_buffer()
self.assertEqual(len(buf), self.n)
self.assertEqual(buf, b'\x00' * self.n)

keys = ('alpha', 'beta', 'gamma', 'delta', 'epsilon', 'zeta')
self.bf.add(*keys)

for key in keys:
self.assertTrue(key in self.bf)
self.assertFalse(key + '-x' in self.bf)

# Convert to buffer and then populate a 2nd bloom-filter.
buf = self.bf.to_buffer()
new_bf = BloomFilter.from_buffer(buf)
for key in keys:
self.assertTrue(key in new_bf)
self.assertFalse(key + '-x' in new_bf)

# Ensure that the two underlying bloom-filter buffers are equal.
self.assertEqual(len(new_bf), self.n)
new_buf = new_bf.to_buffer()
self.assertEqual(buf, new_buf)


class DataTypes(TableFunction):
columns = ('key', 'value')
Expand Down

0 comments on commit 1247834

Please sign in to comment.