Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: binary search tree for attributes #9

Merged
merged 7 commits into from
Feb 11, 2025
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion src/fbs/header.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,17 @@ struct GeographicalExtent {
max: Vector;
}

struct AttributeIndex {
index: ushort;
length: uint;
}

table Header {
transform: Transform; // Transformation vectors
columns: [Column]; // Attribute columns schema (can be omitted if per feature schema)
features_count: ulong; // Number of features in the dataset (0 = unknown)
index_node_size: ushort = 16; // Index node size (0 = no index)

attribute_index: [AttributeIndex];
// metadata
geographical_extent: GeographicalExtent; // Bounds
reference_system: ReferenceSystem; // Spatial Reference System
Expand Down
6 changes: 5 additions & 1 deletion src/rust/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ version = "0.1.0"
edition = "2021"

[workspace]
members = ["cli", "fcb_core", "packed_rtree", "wasm"]
members = ["cli", "fcb_core", "packed_rtree", "wasm", "bst"]
resolver = "2"

[workspace.dependencies]
Expand All @@ -28,6 +28,10 @@ pretty_assertions = "1.4.1"
tokio = { version = "1.43.0", features = ["macros", "rt-multi-thread"] }
rand = "0.8.5"
log = "0.4"
bincode = "1.3.3"
chrono = "0.4"
ordered-float = "4.6.0"
once_cell = "1.20.0"

#---WASM dependencies---
getrandom = { version = "0.2.15", features = ["js"] }
Expand Down
12 changes: 12 additions & 0 deletions src/rust/bst/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[package]
name = "bst"
version = "0.1.0"
edition = "2021"

[dependencies]
bincode = { workspace = true }
serde = { workspace = true }
anyhow = { workspace = true }
chrono = { workspace = true }
ordered-float = { workspace = true }
once_cell = { workspace = true }
245 changes: 245 additions & 0 deletions src/rust/bst/src/byte_serializable.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
use chrono::{DateTime, Datelike, NaiveDate, NaiveDateTime, Utc};
use ordered_float::OrderedFloat;

/// A trait for converting types to and from bytes.
pub trait ByteSerializable {
/// Convert self into a vector of bytes.
fn to_bytes(&self) -> Vec<u8>;

/// Construct an instance from the given bytes.
fn from_bytes(bytes: &[u8]) -> Self;
}

pub enum ByteSerializableValue {
I64(i64),
I32(i32),
I16(i16),
I8(i8),
U64(u64),
U32(u32),
U16(u16),
U8(u8),
F64(OrderedFloat<f64>),
F32(OrderedFloat<f32>),
Bool(bool),
String(String),
NaiveDateTime(NaiveDateTime),
NaiveDate(NaiveDate),
DateTime(DateTime<Utc>),
}

impl ByteSerializable for i64 {
fn to_bytes(&self) -> Vec<u8> {
self.to_le_bytes().to_vec()
}
fn from_bytes(bytes: &[u8]) -> Self {
let mut array = [0u8; 8];
array.copy_from_slice(&bytes[0..8]);
i64::from_le_bytes(array)
}
}

impl ByteSerializable for i32 {
fn to_bytes(&self) -> Vec<u8> {
self.to_le_bytes().to_vec()
}
fn from_bytes(bytes: &[u8]) -> Self {
let mut array = [0u8; 4];
array.copy_from_slice(&bytes[0..4]);
i32::from_le_bytes(array)
}
}

impl ByteSerializable for i16 {
fn to_bytes(&self) -> Vec<u8> {
self.to_le_bytes().to_vec()
}
fn from_bytes(bytes: &[u8]) -> Self {
let array = [0u8; 2];
i16::from_le_bytes(array)
}
}

impl ByteSerializable for i8 {
fn to_bytes(&self) -> Vec<u8> {
vec![*self as u8]
}
fn from_bytes(bytes: &[u8]) -> Self {
bytes[0] as i8
}
}
impl ByteSerializable for u64 {
fn to_bytes(&self) -> Vec<u8> {
self.to_le_bytes().to_vec()
}
fn from_bytes(bytes: &[u8]) -> Self {
let mut array = [0u8; 8];
array.copy_from_slice(&bytes[0..8]);
u64::from_le_bytes(array)
}
}
impl ByteSerializable for u32 {
fn to_bytes(&self) -> Vec<u8> {
self.to_le_bytes().to_vec()
}
fn from_bytes(bytes: &[u8]) -> Self {
let mut array = [0u8; 4];
array.copy_from_slice(&bytes[0..4]);
u32::from_le_bytes(array)
}
}

impl ByteSerializable for u16 {
fn to_bytes(&self) -> Vec<u8> {
self.to_le_bytes().to_vec()
}
fn from_bytes(bytes: &[u8]) -> Self {
let mut array = [0u8; 2];
array.copy_from_slice(&bytes[0..2]);
u16::from_le_bytes(array)
}
}

impl ByteSerializable for u8 {
fn to_bytes(&self) -> Vec<u8> {
vec![*self]
}
fn from_bytes(bytes: &[u8]) -> Self {
bytes[0]
}
}

impl ByteSerializable for String {
fn to_bytes(&self) -> Vec<u8> {
self.as_bytes().to_vec()
}
fn from_bytes(bytes: &[u8]) -> Self {
String::from_utf8(bytes.to_vec()).unwrap()
}
}

impl ByteSerializable for f64 {
fn to_bytes(&self) -> Vec<u8> {
self.to_le_bytes().to_vec()
}
fn from_bytes(bytes: &[u8]) -> Self {
let mut array = [0u8; 8];
array.copy_from_slice(&bytes[0..8]);
f64::from_le_bytes(array)
}
}

impl ByteSerializable for f32 {
fn to_bytes(&self) -> Vec<u8> {
self.to_le_bytes().to_vec()
}
fn from_bytes(bytes: &[u8]) -> Self {
let mut array = [0u8; 4];
array.copy_from_slice(&bytes[0..4]);
f32::from_le_bytes(array)
}
}

// Implement ByteSerializable for OrderedFloat<f64> because f64 doesn't implement Ord trait because of NaN values.
impl ByteSerializable for OrderedFloat<f64> {
fn to_bytes(&self) -> Vec<u8> {
self.0.to_le_bytes().to_vec()
}

fn from_bytes(bytes: &[u8]) -> Self {
let mut array = [0u8; 8];
array.copy_from_slice(&bytes[0..8]);
OrderedFloat(f64::from_le_bytes(array))
}
}

// Implement ByteSerializable for OrderedFloat<f64> because f64 doesn't implement Ord trait because of NaN values.
impl ByteSerializable for OrderedFloat<f32> {
fn to_bytes(&self) -> Vec<u8> {
self.0.to_le_bytes().to_vec()
}
fn from_bytes(bytes: &[u8]) -> Self {
let mut array = [0u8; 4];
array.copy_from_slice(&bytes[0..4]);
OrderedFloat(f32::from_le_bytes(array))
}
}

impl ByteSerializable for bool {
fn to_bytes(&self) -> Vec<u8> {
// Represent true as 1 and false as 0.
vec![if *self { 1u8 } else { 0u8 }]
}
fn from_bytes(bytes: &[u8]) -> Self {
bytes.first().is_some_and(|&b| b != 0)
}
}

/// We serialize a NaiveDateTime as 12 bytes:
/// - 8 bytes for the timestamp (seconds since epoch, as i64, little endian)
/// - 4 bytes for the nanosecond part (u32, little endian)
impl ByteSerializable for NaiveDateTime {
fn to_bytes(&self) -> Vec<u8> {
let mut bytes = self.and_utc().timestamp().to_le_bytes().to_vec();
bytes.extend(&self.and_utc().timestamp_subsec_nanos().to_le_bytes());
bytes
}

fn from_bytes(bytes: &[u8]) -> Self {
// Ensure there are at least 12 bytes.
assert!(bytes.len() >= 12, "Not enough bytes for NaiveDateTime");
let mut ts_bytes = [0u8; 8];
ts_bytes.copy_from_slice(&bytes[0..8]);
let timestamp = i64::from_le_bytes(ts_bytes);

let mut nano_bytes = [0u8; 4];
nano_bytes.copy_from_slice(&bytes[8..12]);
let nanosecond = u32::from_le_bytes(nano_bytes);

NaiveDateTime::from_timestamp(timestamp, nanosecond)
}
}

/// We serialize a NaiveDate as 4 bytes:
/// - 4 bytes for the year (u32, little endian)
/// - 2 bytes for the month (u16, little endian)
/// - 2 bytes for the day (u16, little endian)
impl ByteSerializable for NaiveDate {
fn to_bytes(&self) -> Vec<u8> {
let mut bytes = self.year().to_le_bytes().to_vec();
bytes.extend(&self.month().to_le_bytes());
bytes.extend(&self.day().to_le_bytes());
bytes
}

fn from_bytes(bytes: &[u8]) -> Self {
let mut array = [0u8; 12];
array.copy_from_slice(&bytes[0..12]);
let mut y = [0u8; 4];
let mut m = [0u8; 4];
let mut d = [0u8; 4];
y.copy_from_slice(&array[0..4]);
m.copy_from_slice(&array[4..8]);
d.copy_from_slice(&array[8..12]);

NaiveDate::from_ymd_opt(
i32::from_le_bytes(y),
u32::from_le_bytes(m),
u32::from_le_bytes(d),
)
.unwrap()
}
}

/// Since DateTime<Utc> is essentially a NaiveDateTime with an offset,
/// we delegate the conversion to the NaiveDateTime implementation.
impl ByteSerializable for DateTime<Utc> {
fn to_bytes(&self) -> Vec<u8> {
self.naive_utc().to_bytes()
}

fn from_bytes(bytes: &[u8]) -> Self {
let naive = <NaiveDateTime as ByteSerializable>::from_bytes(bytes);
DateTime::<Utc>::from_utc(naive, Utc)
}
}
Loading