Skip to content

Commit

Permalink
Merge pull request #106 from andreas/andreas/serializers
Browse files Browse the repository at this point in the history
Support native serialization format
  • Loading branch information
saulius authored Jun 28, 2023
2 parents 0eff1fc + 3138209 commit d910cdc
Show file tree
Hide file tree
Showing 13 changed files with 334 additions and 191 deletions.
16 changes: 8 additions & 8 deletions croaring/benches/benches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

extern crate test;

use croaring::Bitmap;
use croaring::{Bitmap, Portable};
use test::Bencher;

#[bench]
Expand Down Expand Up @@ -314,7 +314,7 @@ fn bench_get_serialized_size_in_bytes(b: &mut Bencher) {
bitmap.add(3);

b.iter(|| {
bitmap.get_serialized_size_in_bytes();
bitmap.get_serialized_size_in_bytes::<Portable>();
});
}

Expand Down Expand Up @@ -348,7 +348,7 @@ fn bench_serialize_100000(b: &mut Bencher) {
let bitmap: Bitmap = (1..100000).collect();

b.iter(|| {
bitmap.serialize();
bitmap.serialize::<Portable>();
});
}

Expand All @@ -357,26 +357,26 @@ fn bench_serialize_1000000(b: &mut Bencher) {
let bitmap: Bitmap = (1..1000000).collect();

b.iter(|| {
bitmap.serialize();
bitmap.serialize::<Portable>();
});
}

#[bench]
fn bench_deserialize_100000(b: &mut Bencher) {
let bitmap: Bitmap = (1..100000).collect();
let serialized_buffer = bitmap.serialize();
let serialized_buffer = bitmap.serialize::<Portable>();

b.iter(|| {
Bitmap::deserialize(&serialized_buffer);
Bitmap::deserialize::<Portable>(&serialized_buffer);
});
}

#[bench]
fn bench_deserialize_1000000(b: &mut Bencher) {
let bitmap: Bitmap = (1..1000000).collect();
let serialized_buffer = bitmap.serialize();
let serialized_buffer = bitmap.serialize::<Portable>();

b.iter(|| {
Bitmap::deserialize(&serialized_buffer);
Bitmap::deserialize::<Portable>(&serialized_buffer);
});
}
127 changes: 29 additions & 98 deletions croaring/src/bitmap/imp.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
use crate::Bitset;
use ffi::roaring_bitmap_t;
use std::convert::TryInto;
use std::ffi::c_char;
use std::mem;
use std::ops::{Bound, RangeBounds};

use super::serialization::{Deserializer, Serializer};
use super::{Bitmap, Statistics};

impl Bitmap {
#[inline]
#[allow(clippy::assertions_on_constants)]
unsafe fn take_heap(p: *mut roaring_bitmap_t) -> Self {
pub(crate) unsafe fn take_heap(p: *mut roaring_bitmap_t) -> Self {
// Based heavily on the `roaring.hh` cpp header from croaring

assert!(!p.is_null());
Expand Down Expand Up @@ -734,161 +734,92 @@ impl Bitmap {
buffer
}

/// Computes the serialized size in bytes of the Bitmap.
/// Computes the serialized size in bytes of the Bitmap in format `S`.
#[inline]
#[doc(alias = "roaring_bitmap_portable_size_in_bytes")]
pub fn get_serialized_size_in_bytes(&self) -> usize {
unsafe { ffi::roaring_bitmap_portable_size_in_bytes(&self.bitmap) }
pub fn get_serialized_size_in_bytes<S: Serializer>(&self) -> usize {
S::get_serialized_size_in_bytes(&self)
}

/// Computes the serialized size in bytes of the Bitmap for the frozen format.
#[inline]
#[doc(alias = "roaring_bitmap_frozen_size_in_bytes")]
pub fn get_frozen_serialized_size_in_bytes(&self) -> usize {
unsafe { ffi::roaring_bitmap_frozen_size_in_bytes(&self.bitmap) }
}

/// Serializes a bitmap to a slice of bytes.
/// Serializes a bitmap to a slice of bytes in format `S`.
///
/// # Examples
///
/// ```
/// use croaring::Bitmap;
/// use croaring::{Bitmap, Portable};
///
/// let original_bitmap: Bitmap = (1..5).collect();
///
/// let serialized_buffer = original_bitmap.serialize();
/// let serialized_buffer = original_bitmap.serialize::<Portable>();
///
/// let deserialized_bitmap = Bitmap::deserialize(&serialized_buffer);
/// let deserialized_bitmap = Bitmap::deserialize::<Portable>(&serialized_buffer);
///
/// assert_eq!(original_bitmap, deserialized_bitmap);
/// ```
#[inline]
#[doc(alias = "roaring_bitmap_portable_serialize")]
pub fn serialize(&self) -> Vec<u8> {
pub fn serialize<S: Serializer>(&self) -> Vec<u8> {
let mut dst = Vec::new();
self.serialize_into(&mut dst);
self.serialize_into::<S>(&mut dst);
dst
}

/// Serializes a bitmap to a slice of bytes, re-using existing capacity
/// Serializes a bitmap to a slice of bytes in format `S`, re-using existing capacity
///
/// `dst` is not cleared, data is added after any existing data. Returns the added slice of `dst`.
/// If `dst` is empty, it is guaranteed to hold only the serialized data after this call
///
/// # Examples
///
/// ```
/// use croaring::Bitmap;
/// use croaring::{Bitmap, Portable};
///
/// let original_bitmap_1: Bitmap = (1..5).collect();
/// let original_bitmap_2: Bitmap = (1..10).collect();
///
/// let mut data = Vec::new();
/// for bitmap in [original_bitmap_1, original_bitmap_2] {
/// data.clear();
/// bitmap.serialize_into(&mut data);
/// bitmap.serialize_into::<Portable>(&mut data);
/// // do something with data
/// }
/// ```
#[inline]
#[doc(alias = "roaring_bitmap_portable_serialize")]
pub fn serialize_into<'a>(&self, dst: &'a mut Vec<u8>) -> &'a [u8] {
let len = self.get_serialized_size_in_bytes();

dst.reserve(len);
let total_len = dst.len().checked_add(len).unwrap();

unsafe {
ffi::roaring_bitmap_portable_serialize(
&self.bitmap,
dst.spare_capacity_mut().as_mut_ptr().cast::<c_char>(),
);
dst.set_len(total_len);
}

dst
}

/// Serialize into the "frozen" format
///
/// This has an odd API because it always returns a slice which is aligned to 32 bytes:
/// This means the returned slice may not start exactly at the beginning of the passed Vec
#[doc(alias = "roaring_bitmap_frozen_serialize")]
pub fn serialize_frozen_into<'a>(&self, dst: &'a mut Vec<u8>) -> &'a [u8] {
const REQUIRED_ALIGNMENT: usize = 32;
let len = self.get_frozen_serialized_size_in_bytes();

let offset = dst.len();
// Need to be able to add up to 31 extra bytes to align to 32 bytes
dst.reserve(len.checked_add(REQUIRED_ALIGNMENT - 1).unwrap());

let extra_offset = match (dst.as_ptr() as usize) % REQUIRED_ALIGNMENT {
0 => 0,
r => REQUIRED_ALIGNMENT - r,
};
let offset = offset.checked_add(extra_offset).unwrap();
let total_len = offset.checked_add(len).unwrap();
debug_assert!(dst.capacity() >= total_len);

// we must initialize up to offset
dst.resize(offset, 0);

unsafe {
ffi::roaring_bitmap_frozen_serialize(
&self.bitmap,
dst.as_mut_ptr().add(offset).cast::<c_char>(),
);
dst.set_len(total_len);
}

&dst[offset..total_len]
pub fn serialize_into<'a, S: Serializer>(&self, dst: &'a mut Vec<u8>) -> &'a [u8] {
S::serialize_into(self, dst)
}

/// Given a serialized bitmap as slice of bytes returns a bitmap instance.
/// Given a serialized bitmap as slice of bytes in format `S`, returns a `Bitmap` instance.
/// See example of [`Self::serialize`] function.
///
/// On invalid input returns None.
///
/// # Examples
///
/// ```
/// use croaring::Bitmap;
/// use croaring::{Bitmap, Portable};
///
/// let original_bitmap: Bitmap = (1..5).collect();
/// let serialized_buffer = original_bitmap.serialize();
/// let serialized_buffer = original_bitmap.serialize::<Portable>();
///
/// let deserialized_bitmap = Bitmap::try_deserialize(&serialized_buffer);
/// let deserialized_bitmap = Bitmap::try_deserialize::<Portable>(&serialized_buffer);
/// assert_eq!(original_bitmap, deserialized_bitmap.unwrap());
///
/// let invalid_buffer: Vec<u8> = vec![3];
/// let deserialized_bitmap = Bitmap::try_deserialize(&invalid_buffer);
/// let deserialized_bitmap = Bitmap::try_deserialize::<Portable>(&invalid_buffer);
/// assert!(deserialized_bitmap.is_none());
/// ```
#[inline]
#[doc(alias = "roaring_bitmap_portable_deserialize_safe")]
pub fn try_deserialize(buffer: &[u8]) -> Option<Self> {
unsafe {
let bitmap = ffi::roaring_bitmap_portable_deserialize_safe(
buffer.as_ptr() as *const c_char,
buffer.len(),
);

if !bitmap.is_null() {
Some(Self::take_heap(bitmap))
} else {
None
}
}
pub fn try_deserialize<D: Deserializer>(buffer: &[u8]) -> Option<Self> {
D::try_deserialize(buffer)
}

/// Given a serialized bitmap as slice of bytes returns a bitmap instance.
/// Given a serialized bitmap as slice of bytes in format `S `, returns a bitmap instance.
/// See example of [`Self::serialize`] function.
///
/// On invalid input returns empty bitmap.
#[inline]
pub fn deserialize(buffer: &[u8]) -> Self {
Self::try_deserialize(buffer).unwrap_or_else(Bitmap::create)
pub fn deserialize<D: Deserializer>(buffer: &[u8]) -> Self {
Self::try_deserialize::<D>(buffer).unwrap_or_else(Bitmap::create)
}

/// Creates a new bitmap from a slice of u32 integers
Expand Down Expand Up @@ -1029,14 +960,14 @@ impl Bitmap {
/// # Examples
///
/// ```
/// use croaring::Bitmap;
/// use croaring::{Bitmap, Portable};
///
/// let mut bitmap: Bitmap = (100..1000).collect();
///
/// assert_eq!(bitmap.cardinality(), 900);
/// let old_size = bitmap.get_serialized_size_in_bytes();
/// let old_size = bitmap.get_serialized_size_in_bytes::<Portable>();
/// assert!(bitmap.run_optimize());
/// let new_size = bitmap.get_serialized_size_in_bytes();
/// let new_size = bitmap.get_serialized_size_in_bytes::<Portable>();
/// assert!(new_size < old_size);
/// ```
#[inline]
Expand Down
2 changes: 2 additions & 0 deletions croaring/src/bitmap/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,9 @@ mod imp;
mod iter;
mod lazy;
mod ops;
mod serialization;
mod view;

pub use self::iter::BitmapIterator;
pub use self::lazy::LazyBitmap;
pub use self::serialization::{Frozen, Native, Portable};
Loading

0 comments on commit d910cdc

Please sign in to comment.