Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bson support #142

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ regex = "1.5"
lru = "0.10"
bitvec = "1.0"
tempfile = "3.1"
bson = "2.7"

[features]
noreadlock = []
eprint_log = []
eprint_log = []
4 changes: 4 additions & 0 deletions src/structure/tfc/block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,8 @@ fn record_size_decoding(enc: u8) -> Option<u8> {
0 => None,
3 => Some(4),
4 => Some(8),
5 => Some(12),
6 => Some(16),
_ => panic!("Ok, this is not known"),
}
}
Expand All @@ -760,6 +762,8 @@ fn record_size_encoding(record_size: Option<u8>) -> u8 {
None => 0,
Some(4) => 3 << 3,
Some(8) => 4 << 3,
Some(12) => 5 << 3,
Some(16) => 6 << 3,
_ => {
panic!("This is really bad!")
}
Expand Down
113 changes: 113 additions & 0 deletions src/structure/tfc/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use super::{
TypedDictEntry,
};
use base64::display::Base64Display;
use bson::Decimal128;
use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
use bytes::{Buf, BufMut, Bytes, BytesMut};
use chrono::{NaiveDateTime, NaiveTime};
Expand Down Expand Up @@ -59,6 +60,14 @@ pub enum Datatype {
Base64Binary,
HexBinary,
AnySimpleType,

Decimal128,
BSONObjectId,
TimeStamp64,
BSONTimeStamp,
Regex,
Javascript,
BSONBinary,
}

impl Datatype {
Expand All @@ -84,6 +93,10 @@ impl Datatype {
Datatype::BigInt => None,
Datatype::Token => None,
Datatype::LangString => None,
Datatype::Decimal128 => Some(16),
Datatype::BSONObjectId => Some(12),
Datatype::TimeStamp64 => Some(8),
Datatype::BSONTimeStamp => Some(8),
_ => None,
}
}
Expand Down Expand Up @@ -998,6 +1011,67 @@ impl TdbDataType for HexBinary {
}
}

pub struct BSONObjectId([u8; 12]);

impl FromLexical<BSONObjectId> for BSONObjectId {
fn from_lexical<B: Buf>(mut b: B) -> Self {
let mut result = [0; 12];
b.copy_to_slice(&mut result);

BSONObjectId(result)
}
}

impl ToLexical<BSONObjectId> for BSONObjectId {
fn to_lexical(&self) -> Bytes {
Bytes::copy_from_slice(&self.0)
}
}

impl ToLexical<BSONObjectId> for [u8; 12] {
fn to_lexical(&self) -> Bytes {
Bytes::copy_from_slice(self)
}
}

impl TdbDataType for BSONObjectId {
fn datatype() -> Datatype {
Datatype::BSONObjectId
}
}

const DEC128_SIGN_MASK: u128 = 0x8000_0000_0000_0000_0000_0000_0000_0000;
const DEC128_COMPLEMENT: u128 = 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff;

impl ToLexical<Decimal128> for Decimal128 {
fn to_lexical(&self) -> Bytes {
let bits: u128 = u128::from_le_bytes(self.bytes());
let transformed = if bits & DEC128_SIGN_MASK > 0 {
bits ^ DEC128_COMPLEMENT
} else {
bits ^ DEC128_SIGN_MASK
};
Bytes::copy_from_slice(&transformed.to_be_bytes())
}
}

impl FromLexical<Decimal128> for Decimal128 {
fn from_lexical<B: Buf>(mut b: B) -> Self {
let i = b.get_u128();
if i & DEC128_SIGN_MASK > 0 {
Decimal128::from_bytes((i ^ DEC128_SIGN_MASK).to_le_bytes())
} else {
Decimal128::from_bytes((i ^ DEC128_COMPLEMENT).to_le_bytes())
}
}
}

impl TdbDataType for Decimal128 {
fn datatype() -> Datatype {
Datatype::Decimal128
}
}

macro_rules! stringy_type {
($ty:ident) => {
stringy_type!($ty, $ty);
Expand Down Expand Up @@ -1082,6 +1156,39 @@ macro_rules! biginty_type {
};
}

macro_rules! u64y_type {
($ty:ident) => {
u64y_type!($ty, $ty);
};
($ty:ident, $datatype:ident) => {
#[derive(PartialEq, Debug)]
pub struct $ty(pub u64);

impl TdbDataType for $ty {
fn datatype() -> Datatype {
Datatype::$datatype
}
}

impl FromLexical<$ty> for $ty {
fn from_lexical<B: Buf>(b: B) -> Self {
$ty(FromLexical::<u64>::from_lexical(b))
}
}
impl FromLexical<$ty> for u64 {
fn from_lexical<B: Buf>(b: B) -> Self {
FromLexical::<u64>::from_lexical(b)
}
}

impl ToLexical<$ty> for $ty {
fn to_lexical(&self) -> Bytes {
self.0.to_lexical()
}
}
};
}

stringy_type!(LangString);
stringy_type!(NCName);
stringy_type!(Name);
Expand All @@ -1098,7 +1205,13 @@ stringy_type!(Entity);

stringy_type!(AnySimpleType);

stringy_type!(Regex);
stringy_type!(Javascript);

biginty_type!(PositiveInteger);
biginty_type!(NonNegativeInteger);
biginty_type!(NegativeInteger);
biginty_type!(NonPositiveInteger);

u64y_type!(TimeStamp64);
u64y_type!(BSONTimeStamp);
150 changes: 149 additions & 1 deletion src/structure/tfc/typed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -498,11 +498,12 @@ impl<B1: BufMut, B2: BufMut, B3: BufMut, B4: BufMut> TypedDictBufBuilder<B1, B2,

#[cfg(test)]
mod tests {
use bson::Decimal128;
use bytes::BytesMut;
use chrono::{NaiveDate, NaiveDateTime};
use rug::Integer;

use crate::structure::Decimal;
use crate::structure::{BSONObjectId, Decimal};

use super::*;
fn build_multiple_segments<
Expand Down Expand Up @@ -1344,4 +1345,151 @@ mod tests {
"2002-11-04T11:30:12.000000003Z".to_string()
)
}

#[test]
fn test_blah() {
let used_types_buf = BytesMut::new();
let type_offsets_buf = BytesMut::new();
let block_offsets_buf = BytesMut::new();
let data_buf = BytesMut::new();

let mut typed_builder = TypedDictBufBuilder::new(
used_types_buf,
type_offsets_buf,
block_offsets_buf,
data_buf,
);

let mut vec = vec![
u64::make_entry(&42),
u64::make_entry(&43),
u64::make_entry(&44),
u64::make_entry(&41),
u64::make_entry(&25),
];
vec.sort();
typed_builder.add_all(vec.into_iter());
let (b1, b2, b3, b4) = typed_builder.finalize();
let data = b4.freeze();
let dict = TypedDict::from_parts(b1.freeze(), b2.freeze(), b3.freeze(), data.clone());

let entries: Vec<_> = dict.iter().collect();
eprintln!("{entries:?}");
eprintln!("{data:?}");
panic!("wah");
}

#[test]
fn test_bson_objectid() {
let used_types_buf = BytesMut::new();
let type_offsets_buf = BytesMut::new();
let block_offsets_buf = BytesMut::new();
let data_buf = BytesMut::new();

let mut typed_builder = TypedDictBufBuilder::new(
used_types_buf,
type_offsets_buf,
block_offsets_buf,
data_buf,
);

let mut vec = vec![
BSONObjectId::make_entry(&[42; 12]),
BSONObjectId::make_entry(&[43; 12]),
BSONObjectId::make_entry(&[44; 12]),
BSONObjectId::make_entry(&[41; 12]),
BSONObjectId::make_entry(&[25; 12]),
];
vec.sort();
typed_builder.add_all(vec.into_iter());
let (b1, b2, b3, b4) = typed_builder.finalize();
let data = b4.freeze();
let dict = TypedDict::from_parts(b1.freeze(), b2.freeze(), b3.freeze(), data.clone());

let entries: Vec<_> = dict.iter().collect();
eprintln!("{entries:?}");
eprintln!("{data:?}");
panic!("wah");
}

#[test]
fn test_bson_objectid_overlap() {
let used_types_buf = BytesMut::new();
let type_offsets_buf = BytesMut::new();
let block_offsets_buf = BytesMut::new();
let data_buf = BytesMut::new();

let mut typed_builder = TypedDictBufBuilder::new(
used_types_buf,
type_offsets_buf,
block_offsets_buf,
data_buf,
);

let mut ids = [[42; 12], [43; 12], [44; 12], [41; 12], [25; 12]];
for id in ids.iter_mut() {
id[0] = 42;
}

let mut vec: Vec<_> = ids.iter().map(BSONObjectId::make_entry).collect();
vec.sort();
typed_builder.add_all(vec.into_iter());
let (b1, b2, b3, b4) = typed_builder.finalize();
let data = b4.freeze();
let dict = TypedDict::from_parts(b1.freeze(), b2.freeze(), b3.freeze(), data.clone());

let entries: Vec<_> = dict.iter().collect();
eprintln!("{entries:?}");
eprintln!("{data:?}");
panic!("wah");
}

#[test]
fn test_decimal128() {
let used_types_buf = BytesMut::new();
let type_offsets_buf = BytesMut::new();
let block_offsets_buf = BytesMut::new();
let data_buf = BytesMut::new();

let mut typed_builder = TypedDictBufBuilder::new(
used_types_buf,
type_offsets_buf,
block_offsets_buf,
data_buf,
);

let numbers: Vec<Decimal128> = [
"0.1",
"2.3",
"0.00000028",
"1000000",
"4.2",
"-1.3",
"-12",
"-0.0000005",
]
.iter()
.map(|n| n.parse().unwrap())
.collect();

let mut entries: Vec<_> = numbers.iter().map(Decimal128::make_entry).collect();
entries.sort();

typed_builder.add_all(entries.into_iter());
let (b1, b2, b3, b4) = typed_builder.finalize();
let data = b4.freeze();
let dict = TypedDict::from_parts(b1.freeze(), b2.freeze(), b3.freeze(), data.clone());

let entries: Vec<_> = dict.iter().collect();
eprintln!("{entries:?}");
eprintln!("{data:?}");
eprintln!(
"{:?}",
entries
.iter()
.map(|e| e.as_val::<Decimal128, Decimal128>().to_string())
.collect::<Vec<String>>()
);
panic!("wah");
}
}