From 43934ab496c09d9aba61dc9c87684226b0c5b454 Mon Sep 17 00:00:00 2001 From: Tarek Date: Mon, 4 Nov 2024 18:27:11 +0200 Subject: [PATCH 1/2] feat!: initial support for `FixedSizeBinary` Signed-off-by: Tarek --- .../proof-of-sql/benches/bench_append_rows.rs | 12 +- .../arrow/arrow_array_to_column_conversion.rs | 18 +- .../base/arrow/column_arrow_conversions.rs | 2 + .../base/arrow/owned_and_arrow_conversions.rs | 23 ++- .../src/base/commitment/column_bounds.rs | 5 +- .../commitment/column_commitment_metadata.rs | 179 +++++++++++++++++- .../src/base/commitment/committable_column.rs | 136 +++++++++++++ .../src/base/commitment/naive_commitment.rs | 5 + .../proof-of-sql/src/base/database/column.rs | 128 ++++++++++++- .../src/base/database/column_operation.rs | 28 +++ .../base/database/column_operation_error.rs | 11 ++ .../src/base/database/filter_util.rs | 11 ++ .../src/base/database/group_by_util.rs | 13 +- .../src/base/database/owned_column.rs | 57 ++++++ .../base/database/owned_column_operation.rs | 142 ++++++++++++++ .../database/owned_table_test_accessor.rs | 4 + .../src/base/database/owned_table_utility.rs | 52 ++++- .../proof-of-sql/src/base/math/permutation.rs | 56 ++++++ .../base/polynomial/multilinear_extension.rs | 4 + .../src/base/scalar/mont_scalar.rs | 41 +++- crates/proof-of-sql/src/base/scalar/scalar.rs | 1 + .../dory/blitzar_metadata_table.rs | 4 +- .../dory/dory_commitment_helper_cpu.rs | 4 + .../dory/dory_compute_commitments_test.rs | 37 ++++ .../dynamic_dory_commitment_helper_cpu.rs | 4 + .../src/proof_primitive/dory/pack_scalars.rs | 3 +- .../src/sql/proof/provable_query_result.rs | 8 + .../src/sql/proof/provable_result_column.rs | 2 + .../src/sql/proof/verifiable_query_result.rs | 3 + docs/SQLSyntaxSpecification.md | 2 + 30 files changed, 967 insertions(+), 28 deletions(-) diff --git a/crates/proof-of-sql/benches/bench_append_rows.rs b/crates/proof-of-sql/benches/bench_append_rows.rs index 462ce404a..770f8f31c 100644 --- a/crates/proof-of-sql/benches/bench_append_rows.rs +++ b/crates/proof-of-sql/benches/bench_append_rows.rs @@ -13,8 +13,8 @@ use proof_of_sql::{ commitment::TableCommitment, database::{ owned_table_utility::{ - bigint, boolean, decimal75, int, int128, owned_table, scalar, smallint, - timestamptz, tinyint, varchar, + bigint, boolean, decimal75, fixed_size_binary, int, int128, owned_table, scalar, + smallint, timestamptz, tinyint, varchar, }, OwnedTable, }, @@ -89,6 +89,7 @@ pub fn generate_random_owned_table( "smallint", "int", "timestamptz", + "fixed_size_binary", ]; let mut columns = Vec::new(); @@ -124,6 +125,13 @@ pub fn generate_random_owned_table( PoSQLTimeZone::Utc, vec![rng.gen::(); num_rows], )), + "fixedsizebinary" => { + let byte_width = 16; + let data: Vec> = (0..num_rows) + .map(|_| (0..byte_width).map(|_| rng.gen::()).collect()) + .collect(); + columns.push(fixed_size_binary(identifier.deref(), byte_width, data)); + } _ => unreachable!(), } } diff --git a/crates/proof-of-sql/src/base/arrow/arrow_array_to_column_conversion.rs b/crates/proof-of-sql/src/base/arrow/arrow_array_to_column_conversion.rs index 15770f312..90a132d52 100644 --- a/crates/proof-of-sql/src/base/arrow/arrow_array_to_column_conversion.rs +++ b/crates/proof-of-sql/src/base/arrow/arrow_array_to_column_conversion.rs @@ -2,9 +2,9 @@ use super::scalar_and_i256_conversions::convert_i256_to_scalar; use crate::base::{database::Column, math::decimal::Precision, scalar::Scalar}; use arrow::{ array::{ - Array, ArrayRef, BooleanArray, Decimal128Array, Decimal256Array, Int16Array, Int32Array, - Int64Array, Int8Array, StringArray, TimestampMicrosecondArray, TimestampMillisecondArray, - TimestampNanosecondArray, TimestampSecondArray, + Array, ArrayRef, BooleanArray, Decimal128Array, Decimal256Array, FixedSizeBinaryArray, + Int16Array, Int32Array, Int64Array, Int8Array, StringArray, TimestampMicrosecondArray, + TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, }, datatypes::{i256, DataType, TimeUnit as ArrowTimeUnit}, }; @@ -375,6 +375,18 @@ impl ArrayRefExt for ArrayRef { }) } } + DataType::FixedSizeBinary(byte_width) => { + if let Some(array) = self.as_any().downcast_ref::() { + let start_byte = range.start * *byte_width as usize; + let end_byte = range.end * *byte_width as usize; + let byte_slice = &array.values()[start_byte..end_byte]; + Ok(Column::FixedSizeBinary(*byte_width, byte_slice)) + } else { + Err(ArrowArrayToColumnConversionError::UnsupportedType { + datatype: self.data_type().clone(), + }) + } + } data_type => Err(ArrowArrayToColumnConversionError::UnsupportedType { datatype: data_type.clone(), }), diff --git a/crates/proof-of-sql/src/base/arrow/column_arrow_conversions.rs b/crates/proof-of-sql/src/base/arrow/column_arrow_conversions.rs index 5eade6cf3..652764161 100644 --- a/crates/proof-of-sql/src/base/arrow/column_arrow_conversions.rs +++ b/crates/proof-of-sql/src/base/arrow/column_arrow_conversions.rs @@ -31,6 +31,7 @@ impl From<&ColumnType> for DataType { }; DataType::Timestamp(arrow_timeunit, arrow_timezone) } + ColumnType::FixedSizeBinary(byte_width) => DataType::FixedSizeBinary(*byte_width), } } } @@ -63,6 +64,7 @@ impl TryFrom for ColumnType { )) } DataType::Utf8 => Ok(ColumnType::VarChar), + DataType::FixedSizeBinary(byte_width) => Ok(ColumnType::FixedSizeBinary(byte_width)), _ => Err(format!("Unsupported arrow data type {data_type:?}")), } } diff --git a/crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions.rs b/crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions.rs index 74ad96839..19423bb3e 100644 --- a/crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions.rs +++ b/crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions.rs @@ -22,9 +22,9 @@ use crate::base::{ use alloc::sync::Arc; use arrow::{ array::{ - ArrayRef, BooleanArray, Decimal128Array, Decimal256Array, Int16Array, Int32Array, - Int64Array, Int8Array, StringArray, TimestampMicrosecondArray, TimestampMillisecondArray, - TimestampNanosecondArray, TimestampSecondArray, + ArrayRef, BooleanArray, Decimal128Array, Decimal256Array, FixedSizeBinaryArray, Int16Array, + Int32Array, Int64Array, Int8Array, StringArray, TimestampMicrosecondArray, + TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, }, datatypes::{i256, DataType, Schema, SchemaRef, TimeUnit as ArrowTimeUnit}, error::ArrowError, @@ -109,6 +109,9 @@ impl From> for ArrayRef { PoSQLTimeUnit::Microsecond => Arc::new(TimestampMicrosecondArray::from(col)), PoSQLTimeUnit::Nanosecond => Arc::new(TimestampNanosecondArray::from(col)), }, + OwnedColumn::FixedSizeBinary(byte_width, col) => { + Arc::new(FixedSizeBinaryArray::new(byte_width, col.into(), None)) + } } } } @@ -149,6 +152,11 @@ impl TryFrom<&ArrayRef> for OwnedColumn { /// - `Decimal128Array` when converting from `DataType::Decimal128(38, 0)`. /// - `Decimal256Array` when converting from `DataType::Decimal256` if precision is less than or equal to 75. /// - `StringArray` when converting from `DataType::Utf8`. + /// - `TimestampSecondArray` when converting from `DataType::Timestamp(ArrowTimeUnit::Second)`. + /// - `TimestampMillisecondArray` when converting from `DataType::Timestamp(ArrowTimeUnit::Millisecond)`. + /// - `TimestampMicrosecondArray` when converting from `DataType::Timestamp(ArrowTimeUnit::Microsecond)`. + /// - `TimestampNanosecondArray` when converting from `DataType::Timestamp(ArrowTimeUnit::Nanosecond)`. + /// - `FixedSizeBinaryArray` when converting from `DataType::FixedSizeBinary`. fn try_from(value: &ArrayRef) -> Result { match &value.data_type() { // Arrow uses a bit-packed representation for booleans. @@ -224,6 +232,15 @@ impl TryFrom<&ArrayRef> for OwnedColumn { .map(|s| s.unwrap().to_string()) .collect(), )), + DataType::FixedSizeBinary(byte_width) => Ok(Self::FixedSizeBinary( + *byte_width, + value + .as_any() + .downcast_ref::() + .unwrap() + .values() + .to_vec(), + )), DataType::Timestamp(time_unit, timezone) => match time_unit { ArrowTimeUnit::Second => { let array = value diff --git a/crates/proof-of-sql/src/base/commitment/column_bounds.rs b/crates/proof-of-sql/src/base/commitment/column_bounds.rs index ce6328610..5e710487e 100644 --- a/crates/proof-of-sql/src/base/commitment/column_bounds.rs +++ b/crates/proof-of-sql/src/base/commitment/column_bounds.rs @@ -196,7 +196,7 @@ pub struct ColumnBoundsMismatch { /// Column metadata storing the bounds for column types that have order. /// -/// Other Ord column variants do exist (like Scalar/Boolean). +/// Other Ord column variants do exist (like Scalar/Boolean, `FixedSizeBinary`, etc). /// However, bounding these is useless unless we are performing indexing on these columns. /// This functionality only be considered after we support them in the user-facing sql. #[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] @@ -236,7 +236,8 @@ impl ColumnBounds { | CommittableColumn::Decimal75(_, _, _) | CommittableColumn::Scalar(_) | CommittableColumn::VarChar(_) - | CommittableColumn::RangeCheckWord(_) => ColumnBounds::NoOrder, + | CommittableColumn::RangeCheckWord(_) + | CommittableColumn::FixedSizeBinary(_, _) => ColumnBounds::NoOrder, } } diff --git a/crates/proof-of-sql/src/base/commitment/column_commitment_metadata.rs b/crates/proof-of-sql/src/base/commitment/column_commitment_metadata.rs index 75b49f0bb..b031a7649 100644 --- a/crates/proof-of-sql/src/base/commitment/column_commitment_metadata.rs +++ b/crates/proof-of-sql/src/base/commitment/column_commitment_metadata.rs @@ -54,7 +54,8 @@ impl ColumnCommitmentMetadata { ColumnType::Boolean | ColumnType::VarChar | ColumnType::Scalar - | ColumnType::Decimal75(..), + | ColumnType::Decimal75(..) + | ColumnType::FixedSizeBinary(_), ColumnBounds::NoOrder, ) => Ok(ColumnCommitmentMetadata { column_type, @@ -286,6 +287,18 @@ mod tests { bounds: ColumnBounds::NoOrder } ); + + assert_eq!( + ColumnCommitmentMetadata::try_new( + ColumnType::FixedSizeBinary(16), + ColumnBounds::NoOrder + ) + .unwrap(), + ColumnCommitmentMetadata { + column_type: ColumnType::FixedSizeBinary(16), + bounds: ColumnBounds::NoOrder + } + ); } #[test] @@ -373,6 +386,21 @@ mod tests { ), Err(InvalidColumnCommitmentMetadata::TypeBoundsMismatch { .. }) )); + + assert!(matches!( + ColumnCommitmentMetadata::try_new( + ColumnType::FixedSizeBinary(16), + ColumnBounds::BigInt(Bounds::Empty) + ), + Err(InvalidColumnCommitmentMetadata::TypeBoundsMismatch { .. }) + )); + assert!(matches!( + ColumnCommitmentMetadata::try_new( + ColumnType::FixedSizeBinary(16), + ColumnBounds::Int128(Bounds::Empty) + ), + Err(InvalidColumnCommitmentMetadata::TypeBoundsMismatch { .. }) + )); } #[test] @@ -427,6 +455,26 @@ mod tests { assert_eq!(varchar_metadata.column_type(), &ColumnType::VarChar); assert_eq!(varchar_metadata.bounds(), &ColumnBounds::NoOrder); + let byte_width = 16; + let fixed_size_binary_data = [ + vec![0u8; byte_width], + vec![1u8; byte_width], + vec![2u8; byte_width], + ]; + let fixed_size_binary_column = OwnedColumn::::FixedSizeBinary( + byte_width as i32, + fixed_size_binary_data.concat(), + ); + let committable_fixed_size_binary_column = + CommittableColumn::from(&fixed_size_binary_column); + let fixed_size_binary_metadata = + ColumnCommitmentMetadata::from_column(&committable_fixed_size_binary_column); + assert_eq!( + fixed_size_binary_metadata.column_type(), + &ColumnType::FixedSizeBinary(byte_width as i32) + ); + assert_eq!(fixed_size_binary_metadata.bounds(), &ColumnBounds::NoOrder); + let bigint_column = OwnedColumn::::BigInt([1, 2, 3, 1, 0].to_vec()); let committable_bigint_column = CommittableColumn::from(&bigint_column); let bigint_metadata = ColumnCommitmentMetadata::from_column(&committable_bigint_column); @@ -998,5 +1046,134 @@ mod tests { assert!(timestamp_tz_metadata_b .try_difference(timestamp_tz_metadata_a) .is_err()); + + let fixed_size_binary_metadata = ColumnCommitmentMetadata { + column_type: ColumnType::FixedSizeBinary(16), // Example byte width + bounds: ColumnBounds::NoOrder, + }; + + // Ensure FixedSizeBinary cannot be unioned with other types + assert!(fixed_size_binary_metadata + .try_union(boolean_metadata) + .is_err()); + assert!(boolean_metadata + .try_union(fixed_size_binary_metadata) + .is_err()); + + assert!(fixed_size_binary_metadata + .try_union(varchar_metadata) + .is_err()); + assert!(varchar_metadata + .try_union(fixed_size_binary_metadata) + .is_err()); + + assert!(fixed_size_binary_metadata + .try_union(scalar_metadata) + .is_err()); + assert!(scalar_metadata + .try_union(fixed_size_binary_metadata) + .is_err()); + + assert!(fixed_size_binary_metadata + .try_union(tinyint_metadata) + .is_err()); + assert!(tinyint_metadata + .try_union(fixed_size_binary_metadata) + .is_err()); + + assert!(fixed_size_binary_metadata + .try_union(smallint_metadata) + .is_err()); + assert!(smallint_metadata + .try_union(fixed_size_binary_metadata) + .is_err()); + + assert!(fixed_size_binary_metadata.try_union(int_metadata).is_err()); + assert!(int_metadata.try_union(fixed_size_binary_metadata).is_err()); + + assert!(fixed_size_binary_metadata + .try_union(bigint_metadata) + .is_err()); + assert!(bigint_metadata + .try_union(fixed_size_binary_metadata) + .is_err()); + + assert!(fixed_size_binary_metadata + .try_union(int128_metadata) + .is_err()); + assert!(int128_metadata + .try_union(fixed_size_binary_metadata) + .is_err()); + + assert!(fixed_size_binary_metadata + .try_union(decimal75_metadata) + .is_err()); + assert!(decimal75_metadata + .try_union(fixed_size_binary_metadata) + .is_err()); + + // Ensure FixedSizeBinary cannot be differenced with other types + assert!(fixed_size_binary_metadata + .try_difference(boolean_metadata) + .is_err()); + assert!(boolean_metadata + .try_difference(fixed_size_binary_metadata) + .is_err()); + + assert!(fixed_size_binary_metadata + .try_difference(varchar_metadata) + .is_err()); + assert!(varchar_metadata + .try_difference(fixed_size_binary_metadata) + .is_err()); + + assert!(fixed_size_binary_metadata + .try_difference(scalar_metadata) + .is_err()); + assert!(scalar_metadata + .try_difference(fixed_size_binary_metadata) + .is_err()); + + assert!(fixed_size_binary_metadata + .try_difference(tinyint_metadata) + .is_err()); + assert!(tinyint_metadata + .try_difference(fixed_size_binary_metadata) + .is_err()); + + assert!(fixed_size_binary_metadata + .try_difference(smallint_metadata) + .is_err()); + assert!(smallint_metadata + .try_difference(fixed_size_binary_metadata) + .is_err()); + + assert!(fixed_size_binary_metadata + .try_difference(int_metadata) + .is_err()); + assert!(int_metadata + .try_difference(fixed_size_binary_metadata) + .is_err()); + + assert!(fixed_size_binary_metadata + .try_difference(bigint_metadata) + .is_err()); + assert!(bigint_metadata + .try_difference(fixed_size_binary_metadata) + .is_err()); + + assert!(fixed_size_binary_metadata + .try_difference(int128_metadata) + .is_err()); + assert!(int128_metadata + .try_difference(fixed_size_binary_metadata) + .is_err()); + + assert!(fixed_size_binary_metadata + .try_difference(decimal75_metadata) + .is_err()); + assert!(decimal75_metadata + .try_difference(fixed_size_binary_metadata) + .is_err()); } } diff --git a/crates/proof-of-sql/src/base/commitment/committable_column.rs b/crates/proof-of-sql/src/base/commitment/committable_column.rs index 58f937a09..50db54e91 100644 --- a/crates/proof-of-sql/src/base/commitment/committable_column.rs +++ b/crates/proof-of-sql/src/base/commitment/committable_column.rs @@ -46,11 +46,15 @@ pub enum CommittableColumn<'a> { /// Borrowed byte column, mapped to `u8`. This is not a `PoSQL` /// type, we need this to commit to words in the range check. RangeCheckWord(&'a [u8]), + /// Borrowed `FixedSizeBinary` column, mapped to a slice of bytes. + /// - The i32 specifies the number of bytes per value. + FixedSizeBinary(i32, &'a [u8]), } impl<'a> CommittableColumn<'a> { /// Returns the length of the column. #[must_use] + #[allow(clippy::missing_panics_doc)] pub fn len(&self) -> usize { match self { CommittableColumn::TinyInt(col) => col.len(), @@ -63,6 +67,10 @@ impl<'a> CommittableColumn<'a> { | CommittableColumn::VarChar(col) => col.len(), CommittableColumn::Boolean(col) => col.len(), CommittableColumn::RangeCheckWord(col) => col.len(), + CommittableColumn::FixedSizeBinary(byte_width, col) => { + assert!(*byte_width > 0, "Byte width must be greater than zero"); + col.len() / *byte_width as usize + } } } @@ -94,6 +102,7 @@ impl<'a> From<&CommittableColumn<'a>> for ColumnType { CommittableColumn::VarChar(_) => ColumnType::VarChar, CommittableColumn::Boolean(_) => ColumnType::Boolean, CommittableColumn::TimestampTZ(tu, tz, _) => ColumnType::TimestampTZ(*tu, *tz), + CommittableColumn::FixedSizeBinary(size, _) => ColumnType::FixedSizeBinary(*size), CommittableColumn::RangeCheckWord(_) => { unimplemented!("Range check words are not a column type.") } @@ -120,6 +129,9 @@ impl<'a, S: Scalar> From<&Column<'a, S>> for CommittableColumn<'a> { CommittableColumn::VarChar(as_limbs) } Column::TimestampTZ(tu, tz, times) => CommittableColumn::TimestampTZ(*tu, *tz, times), + Column::FixedSizeBinary(byte_width, bytes) => { + CommittableColumn::FixedSizeBinary(*byte_width, bytes) + } } } } @@ -159,6 +171,9 @@ impl<'a, S: Scalar> From<&'a OwnedColumn> for CommittableColumn<'a> { OwnedColumn::TimestampTZ(tu, tz, times) => { CommittableColumn::TimestampTZ(*tu, *tz, times as &[_]) } + OwnedColumn::FixedSizeBinary(byte_width, bytes) => { + CommittableColumn::FixedSizeBinary(*byte_width, bytes) + } } } } @@ -221,6 +236,8 @@ impl<'a, 'b> From<&'a CommittableColumn<'b>> for Sequence<'a> { CommittableColumn::Boolean(bools) => Sequence::from(*bools), CommittableColumn::TimestampTZ(_, _, times) => Sequence::from(*times), CommittableColumn::RangeCheckWord(words) => Sequence::from(*words), + // FIXME: Is this the correct way to convert a FixedSizeBinary column to a Sequence? + CommittableColumn::FixedSizeBinary(_, bytes) => Sequence::from(*bytes), } } } @@ -405,6 +422,37 @@ mod tests { assert_eq!(bigint_committable_column.column_type(), ColumnType::VarChar); } + #[test] + fn we_can_get_type_and_length_of_fixed_size_binary_column() { + let byte_width = 16; // Example byte width + + // Empty case + let fixed_size_binary_committable_column = + CommittableColumn::FixedSizeBinary(byte_width, &[]); + assert_eq!(fixed_size_binary_committable_column.len(), 0); + assert!(fixed_size_binary_committable_column.is_empty()); + assert_eq!( + fixed_size_binary_committable_column.column_type(), + ColumnType::FixedSizeBinary(byte_width) + ); + + // Non-empty case + let fixed_size_binary_data = [ + vec![0u8; byte_width as usize], + vec![1u8; byte_width as usize], + vec![2u8; byte_width as usize], + ]; + let concatenated_data: Vec = fixed_size_binary_data.concat(); + let fixed_size_binary_committable_column = + CommittableColumn::FixedSizeBinary(byte_width, &concatenated_data); + assert_eq!(fixed_size_binary_committable_column.len(), 3); + assert!(!fixed_size_binary_committable_column.is_empty()); + assert_eq!( + fixed_size_binary_committable_column.column_type(), + ColumnType::FixedSizeBinary(byte_width) + ); + } + #[test] fn we_can_get_type_and_length_of_scalar_column() { // empty case @@ -590,6 +638,35 @@ mod tests { ); } + #[test] + fn we_can_convert_from_borrowing_fixed_size_binary_column() { + let byte_width = 16; + + // Empty case + let from_borrowed_column = CommittableColumn::from( + &Column::::FixedSizeBinary(byte_width, &[]), + ); + assert_eq!( + from_borrowed_column, + CommittableColumn::FixedSizeBinary(byte_width, &[]) + ); + + // Non-empty case + let fixed_size_binary_data = [ + vec![0u8; byte_width as usize], + vec![1u8; byte_width as usize], + vec![2u8; byte_width as usize], + ]; + let concatenated_data: Vec = fixed_size_binary_data.concat(); + let from_borrowed_column = CommittableColumn::from( + &Column::::FixedSizeBinary(byte_width, &concatenated_data), + ); + assert_eq!( + from_borrowed_column, + CommittableColumn::FixedSizeBinary(byte_width, &concatenated_data) + ); + } + #[test] fn we_can_convert_from_borrowing_scalar_column() { // empty case @@ -732,6 +809,34 @@ mod tests { ); } + #[test] + fn we_can_convert_from_owned_fixed_size_binary_column() { + let byte_width = 16; + + // Empty case + let owned_column = OwnedColumn::::FixedSizeBinary(byte_width, Vec::new()); + let from_owned_column = CommittableColumn::from(&owned_column); + assert_eq!( + from_owned_column, + CommittableColumn::FixedSizeBinary(byte_width, &[]) + ); + + // Non-empty case + let fixed_size_binary_data = [ + vec![0u8; byte_width as usize], + vec![1u8; byte_width as usize], + vec![2u8; byte_width as usize], + ]; + let concatenated_data: Vec = fixed_size_binary_data.concat(); + let owned_column = + OwnedColumn::::FixedSizeBinary(byte_width, concatenated_data.clone()); + let from_owned_column = CommittableColumn::from(&owned_column); + assert_eq!( + from_owned_column, + CommittableColumn::FixedSizeBinary(byte_width, &concatenated_data) + ); + } + #[test] fn we_can_convert_from_owned_scalar_column() { // empty case @@ -964,6 +1069,37 @@ mod tests { assert_eq!(commitment_buffer[0], commitment_buffer[1]); } + #[test] + fn we_can_commit_to_fixed_size_binary_column_through_committable_column() { + let byte_width = 16; + + // Empty case + let committable_column = CommittableColumn::FixedSizeBinary(byte_width, &[]); + let sequence = Sequence::from(&committable_column); + let mut commitment_buffer = [CompressedRistretto::default()]; + compute_curve25519_commitments(&mut commitment_buffer, &[sequence], 0); + assert_eq!(commitment_buffer[0], CompressedRistretto::default()); + + // Non-empty case + let fixed_size_binary_data = [ + vec![0u8; byte_width as usize], + vec![1u8; byte_width as usize], + vec![2u8; byte_width as usize], + ]; + let concatenated_data: Vec = fixed_size_binary_data.concat(); + let committable_column = CommittableColumn::FixedSizeBinary(byte_width, &concatenated_data); + + let sequence_actual = Sequence::from(&committable_column); + let sequence_expected = Sequence::from(concatenated_data.as_slice()); + let mut commitment_buffer = [CompressedRistretto::default(); 2]; + compute_curve25519_commitments( + &mut commitment_buffer, + &[sequence_actual, sequence_expected], + 0, + ); + assert_eq!(commitment_buffer[0], commitment_buffer[1]); + } + #[test] fn we_can_commit_to_scalar_column_through_committable_column() { // empty case diff --git a/crates/proof-of-sql/src/base/commitment/naive_commitment.rs b/crates/proof-of-sql/src/base/commitment/naive_commitment.rs index 6c6e3f078..3202168a0 100644 --- a/crates/proof-of-sql/src/base/commitment/naive_commitment.rs +++ b/crates/proof-of-sql/src/base/commitment/naive_commitment.rs @@ -153,6 +153,11 @@ impl Commitment for NaiveCommitment { .iter() .map(core::convert::Into::into) .collect(), + CommittableColumn::FixedSizeBinary(byte_width, bytes) => bytes + // Chunk the bytes into `byte_width`-sized chunks and convert each chunk into a scalar + .chunks(*byte_width as usize) + .map(TestScalar::from) + .collect(), }; vectors.append(&mut existing_scalars); NaiveCommitment(vectors) diff --git a/crates/proof-of-sql/src/base/database/column.rs b/crates/proof-of-sql/src/base/database/column.rs index be536b1d5..34b551e64 100644 --- a/crates/proof-of-sql/src/base/database/column.rs +++ b/crates/proof-of-sql/src/base/database/column.rs @@ -52,6 +52,9 @@ pub enum Column<'a, S: Scalar> { /// - the second element maps to a timezone /// - the third element maps to columns of timeunits since unix epoch TimestampTZ(PoSQLTimeUnit, PoSQLTimeZone, &'a [i64]), + /// Fixed size binary columns + /// - the i32 specifies the number of bytes per value + FixedSizeBinary(i32, &'a [u8]), } impl<'a, S: Scalar> Column<'a, S> { @@ -71,6 +74,7 @@ impl<'a, S: Scalar> Column<'a, S> { Self::TimestampTZ(time_unit, timezone, _) => { ColumnType::TimestampTZ(*time_unit, *timezone) } + Self::FixedSizeBinary(byte_width, _) => ColumnType::FixedSizeBinary(*byte_width), } } /// Returns the length of the column. @@ -90,6 +94,11 @@ impl<'a, S: Scalar> Column<'a, S> { } Self::Int128(col) => col.len(), Self::Scalar(col) | Self::Decimal75(_, _, col) => col.len(), + Self::FixedSizeBinary(byte_width, col) => { + // The length of the column is the length of the byte array divided by the byte width + assert!(*byte_width > 0, "Byte width must be greater than zero"); + col.len() / *byte_width as usize + } } } /// Returns `true` if the column has no elements. @@ -164,6 +173,9 @@ impl<'a, S: Scalar> Column<'a, S> { )) } OwnedColumn::TimestampTZ(tu, tz, col) => Column::TimestampTZ(*tu, *tz, col.as_slice()), + OwnedColumn::FixedSizeBinary(byte_width, col) => { + Column::FixedSizeBinary(*byte_width, col.as_slice()) + } } } @@ -189,6 +201,16 @@ impl<'a, S: Scalar> Column<'a, S> { Self::TimestampTZ(_, _, col) => { alloc.alloc_slice_fill_with(col.len(), |i| S::from(col[i])) } + Self::FixedSizeBinary(byte_width, col) => { + // The length of the column is the length of the byte array divided by the byte width + let num_elements = col.len() / *byte_width as usize; + // Create a slice of scalars from the byte array + alloc.alloc_slice_fill_with(num_elements, |i| { + let start = i * *byte_width as usize; + let end = start + *byte_width as usize; + S::from(&col[start..end]) + }) + } } } @@ -205,6 +227,11 @@ impl<'a, S: Scalar> Column<'a, S> { Self::Int128(col) => S::from(col[index]), Self::Scalar(col) | Self::Decimal75(_, _, col) => col[index], Self::VarChar((_, scals)) => scals[index], + Self::FixedSizeBinary(byte_width, col) => { + let start = index * *byte_width as usize; + let end = start + *byte_width as usize; + S::from(&col[start..end]) + } }) } @@ -223,6 +250,16 @@ impl<'a, S: Scalar> Column<'a, S> { Self::Int128(col) => slice_cast_with(col, |i| S::from(i) * scale_factor), Self::Scalar(col) => slice_cast_with(col, |i| S::from(i) * scale_factor), Self::TimestampTZ(_, _, col) => slice_cast_with(col, |i| S::from(i) * scale_factor), + Self::FixedSizeBinary(byte_width, col) => { + let num_elements = col.len() / byte_width as usize; + (0..num_elements) + .map(|i| { + let start = i * byte_width as usize; + let end = start + byte_width as usize; + S::from(&col[start..end]) * scale_factor + }) + .collect() + } } } } @@ -264,6 +301,9 @@ pub enum ColumnType { /// Mapped to [`Curve25519Scalar`](crate::base::scalar::Curve25519Scalar) #[serde(alias = "SCALAR", alias = "scalar")] Scalar, + /// Mapped to fixed size binary + #[serde(alias = "FIXEDSIZEBINARY", alias = "fixedsizebinary")] + FixedSizeBinary(i32), } impl ColumnType { @@ -350,7 +390,7 @@ impl ColumnType { // Scalars are not in database & are only used for typeless comparisons for testing so we return 0 // so that they do not cause errors when used in comparisons. Self::Scalar => Some(0_u8), - Self::Boolean | Self::VarChar => None, + Self::Boolean | Self::VarChar | Self::FixedSizeBinary(_) => None, } } /// Returns scale of a [`ColumnType`] if it is convertible to a decimal wrapped in `Some()`. Otherwise return None. @@ -364,7 +404,7 @@ impl ColumnType { | Self::BigInt | Self::Int128 | Self::Scalar => Some(0), - Self::Boolean | Self::VarChar => None, + Self::Boolean | Self::VarChar | Self::FixedSizeBinary(_) => None, Self::TimestampTZ(tu, _) => match tu { PoSQLTimeUnit::Second => Some(0), PoSQLTimeUnit::Millisecond => Some(3), @@ -385,6 +425,7 @@ impl ColumnType { Self::BigInt | Self::TimestampTZ(_, _) => size_of::(), Self::Int128 => size_of::(), Self::Scalar | Self::Decimal75(_, _) | Self::VarChar => size_of::<[u64; 4]>(), + Self::FixedSizeBinary(byte_width) => *byte_width as usize, } } @@ -405,7 +446,11 @@ impl ColumnType { | Self::BigInt | Self::Int128 | Self::TimestampTZ(_, _) => true, - Self::Decimal75(_, _) | Self::Scalar | Self::VarChar | Self::Boolean => false, + Self::Decimal75(_, _) + | Self::Scalar + | Self::VarChar + | Self::Boolean + | Self::FixedSizeBinary(_) => false, } } } @@ -432,6 +477,7 @@ impl Display for ColumnType { ColumnType::TimestampTZ(timeunit, timezone) => { write!(f, "TIMESTAMP(TIMEUNIT: {timeunit}, TIMEZONE: {timezone})") } + ColumnType::FixedSizeBinary(byte_width) => write!(f, "FIXEDSIZEBINARY({byte_width})"), } } } @@ -551,6 +597,10 @@ mod tests { let column_type = ColumnType::Decimal75(Precision::new(1).unwrap(), 0); let serialized = serde_json::to_string(&column_type).unwrap(); assert_eq!(serialized, r#"{"Decimal75":[1,0]}"#); + + let column_type = ColumnType::FixedSizeBinary(4); + let serialized = serde_json::to_string(&column_type).unwrap(); + assert_eq!(serialized, r#"{"FixedSizeBinary":4}"#); } #[test] @@ -617,6 +667,10 @@ mod tests { let expected_column_type = ColumnType::Decimal75(Precision::new(1).unwrap(), 0); let deserialized: ColumnType = serde_json::from_str(r#"{"Decimal75":[1, 0]}"#).unwrap(); assert_eq!(deserialized, expected_column_type); + + let expected_column_type = ColumnType::FixedSizeBinary(4); + let deserialized: ColumnType = serde_json::from_str(r#"{"FixedSizeBinary":4}"#).unwrap(); + assert_eq!(deserialized, expected_column_type); } #[test] @@ -706,6 +760,15 @@ mod tests { serde_json::from_str::(r#"{"DECIMAL75":[1,-128]}"#).unwrap(), ColumnType::Decimal75(Precision::new(1).unwrap(), -128) ); + + assert_eq!( + serde_json::from_str::(r#"{"FixedSizeBinary":4}"#).unwrap(), + ColumnType::FixedSizeBinary(4) + ); + assert_eq!( + serde_json::from_str::(r#"{"fixedsizebinary":4}"#).unwrap(), + ColumnType::FixedSizeBinary(4) + ); } #[test] @@ -740,6 +803,10 @@ mod tests { let deserialized: Result = serde_json::from_str(r#""ScaLar""#); assert!(deserialized.is_err()); + + // FixedSizeBinary requires a number + let deserialized: Result = serde_json::from_str(r#""FixedSizeBinary""#); + assert!(deserialized.is_err()); } #[test] @@ -812,6 +879,14 @@ mod tests { serde_json::from_str::(&decimal75_json).unwrap(), decimal75 ); + + let fixedsizebinary = ColumnType::FixedSizeBinary(4); + let fixedsizebinary_json = serde_json::to_string(&fixedsizebinary).unwrap(); + assert_eq!(fixedsizebinary_json, r#"{"FixedSizeBinary":4}"#); + assert_eq!( + serde_json::from_str::(&fixedsizebinary_json).unwrap(), + fixedsizebinary + ); } #[test] @@ -869,6 +944,18 @@ mod tests { assert_eq!(column.len(), 3); assert!(!column.is_empty()); + let byte_width = 16; + let fixed_size_binary_data = [ + vec![0u8; byte_width], + vec![1u8; byte_width], + vec![2u8; byte_width], + ]; + let concatenated_data: Vec = fixed_size_binary_data.concat(); + let column = + Column::::FixedSizeBinary(byte_width as i32, &concatenated_data); + assert_eq!(column.len(), 3); + assert!(!column.is_empty()); + // Test empty columns let column = Column::::Boolean(&[]); assert_eq!(column.len(), 0); @@ -905,6 +992,11 @@ mod tests { let column: Column<'_, Curve25519Scalar> = Column::Decimal75(precision, scale, &[]); assert_eq!(column.len(), 0); assert!(column.is_empty()); + + let byte_width = 16; + let column = Column::::FixedSizeBinary(byte_width, &[]); + assert_eq!(column.len(), 0); + assert!(column.is_empty()); } #[test] @@ -956,6 +1048,24 @@ mod tests { ); let new_owned_col = (&col).into(); assert_eq!(owned_col, new_owned_col); + + // FixedSizeBinary + let byte_width = 16; + let fixed_size_binary_data = [ + vec![0u8; byte_width], + vec![1u8; byte_width], + vec![2u8; byte_width], + ]; + let concatenated_data: Vec = fixed_size_binary_data.concat(); + let owned_col: OwnedColumn = + OwnedColumn::FixedSizeBinary(byte_width as i32, concatenated_data.clone()); + let col = Column::::from_owned_column(&owned_col, &alloc); + assert_eq!( + col, + Column::FixedSizeBinary(byte_width as i32, &concatenated_data) + ); + let new_owned_col = (&col).into(); + assert_eq!(owned_col, new_owned_col); } #[test] @@ -1015,5 +1125,17 @@ mod tests { Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, &[1, 2, 3]); assert_eq!(column.column_type().byte_size(), 8); assert_eq!(column.column_type().bit_size(), 64); + + let byte_width: usize = 16; + let fixed_size_binary_data = [ + vec![0u8; byte_width], + vec![1u8; byte_width], + vec![2u8; byte_width], + ]; + let concatenated_data: Vec = fixed_size_binary_data.concat(); + let column = + Column::::FixedSizeBinary(byte_width as i32, &concatenated_data); + assert_eq!(column.column_type().byte_size(), byte_width); + assert_eq!(column.column_type().bit_size(), (byte_width * 8) as u32); } } diff --git a/crates/proof-of-sql/src/base/database/column_operation.rs b/crates/proof-of-sql/src/base/database/column_operation.rs index ed82dc03b..10d5c7285 100644 --- a/crates/proof-of-sql/src/base/database/column_operation.rs +++ b/crates/proof-of-sql/src/base/database/column_operation.rs @@ -1085,6 +1085,13 @@ mod test { try_add_subtract_column_types(lhs, rhs, BinaryOperator::Add), Err(ColumnOperationError::BinaryOperationInvalidColumnType { .. }) )); + + let lhs = ColumnType::FixedSizeBinary(10); + let rhs = ColumnType::FixedSizeBinary(10); + assert!(matches!( + try_add_subtract_column_types(lhs, rhs, BinaryOperator::Add), + Err(ColumnOperationError::BinaryOperationInvalidColumnType { .. }) + )); } #[test] @@ -1220,6 +1227,13 @@ mod test { try_add_subtract_column_types(lhs, rhs, BinaryOperator::Subtract), Err(ColumnOperationError::BinaryOperationInvalidColumnType { .. }) )); + + let lhs = ColumnType::FixedSizeBinary(10); + let rhs = ColumnType::FixedSizeBinary(10); + assert!(matches!( + try_add_subtract_column_types(lhs, rhs, BinaryOperator::Subtract), + Err(ColumnOperationError::BinaryOperationInvalidColumnType { .. }) + )); } #[test] @@ -1355,6 +1369,13 @@ mod test { try_multiply_column_types(lhs, rhs), Err(ColumnOperationError::BinaryOperationInvalidColumnType { .. }) )); + + let lhs = ColumnType::FixedSizeBinary(10); + let rhs = ColumnType::FixedSizeBinary(10); + assert!(matches!( + try_multiply_column_types(lhs, rhs), + Err(ColumnOperationError::BinaryOperationInvalidColumnType { .. }) + )); } #[test] @@ -1517,6 +1538,13 @@ mod test { try_divide_column_types(lhs, rhs), Err(ColumnOperationError::BinaryOperationInvalidColumnType { .. }) )); + + let lhs = ColumnType::FixedSizeBinary(10); + let rhs = ColumnType::FixedSizeBinary(10); + assert!(matches!( + try_divide_column_types(lhs, rhs), + Err(ColumnOperationError::BinaryOperationInvalidColumnType { .. }) + )); } #[test] diff --git a/crates/proof-of-sql/src/base/database/column_operation_error.rs b/crates/proof-of-sql/src/base/database/column_operation_error.rs index 7be9d2fcc..e56390b1c 100644 --- a/crates/proof-of-sql/src/base/database/column_operation_error.rs +++ b/crates/proof-of-sql/src/base/database/column_operation_error.rs @@ -16,6 +16,17 @@ pub enum ColumnOperationError { len_b: usize, }, + /// Mismatched byte sizes in `FixedSizeBinary` columns + #[snafu(display( + "FixedSizeBinary columns have different byte sizes: {byte_size_a} != {byte_size_b}" + ))] + FixedSizeBinaryByteSizeMismatch { + /// Byte size of the first column + byte_size_a: i32, + /// Byte size of the second column + byte_size_b: i32, + }, + /// Incorrect `ColumnType` in binary operations #[snafu(display("{operator:?}(lhs: {left_type:?}, rhs: {right_type:?}) is not supported"))] BinaryOperationInvalidColumnType { diff --git a/crates/proof-of-sql/src/base/database/filter_util.rs b/crates/proof-of-sql/src/base/database/filter_util.rs index fe09f1284..77c403495 100644 --- a/crates/proof-of-sql/src/base/database/filter_util.rs +++ b/crates/proof-of-sql/src/base/database/filter_util.rs @@ -75,5 +75,16 @@ pub fn filter_column_by_index<'a, S: Scalar>( *tz, alloc.alloc_slice_fill_iter(indexes.iter().map(|&i| col[i])), ), + Column::FixedSizeBinary(byte_width, col) => { + // We need to filter the bytes by the indexes + let mut filtered_bytes = Vec::with_capacity(indexes.len() * *byte_width as usize); + for &i in indexes { + let start = i * *byte_width as usize; + let end = start + *byte_width as usize; + filtered_bytes.extend_from_slice(&col[start..end]); + } + let allocated_bytes = alloc.alloc_slice_copy(&filtered_bytes); + Column::FixedSizeBinary(*byte_width, allocated_bytes) + } } } diff --git a/crates/proof-of-sql/src/base/database/group_by_util.rs b/crates/proof-of-sql/src/base/database/group_by_util.rs index 920337c23..a06644cce 100644 --- a/crates/proof-of-sql/src/base/database/group_by_util.rs +++ b/crates/proof-of-sql/src/base/database/group_by_util.rs @@ -162,7 +162,10 @@ pub(crate) fn sum_aggregate_column_by_index_counts<'a, S: Scalar>( } Column::Scalar(col) => sum_aggregate_slice_by_index_counts(alloc, col, counts, indexes), // The following should never be reached because the `SUM` function can only be applied to numeric types. - Column::VarChar(_) | Column::TimestampTZ(_, _, _) | Column::Boolean(_) => { + Column::VarChar(_) + | Column::TimestampTZ(_, _, _) + | Column::Boolean(_) + | Column::FixedSizeBinary(_, _) => { unreachable!("SUM can not be applied to non-numeric types") } } @@ -197,6 +200,9 @@ pub(crate) fn max_aggregate_column_by_index_counts<'a, S: Scalar>( Column::VarChar(_) => { unreachable!("MAX can not be applied to varchar") } + Column::FixedSizeBinary(_, _) => { + unreachable!("MAX can not be applied to fixed size binary") + } } } @@ -229,6 +235,9 @@ pub(crate) fn min_aggregate_column_by_index_counts<'a, S: Scalar>( Column::VarChar(_) => { unreachable!("MIN can not be applied to varchar") } + Column::FixedSizeBinary(_, _) => { + unreachable!("MIN can not be applied to fixed size binary") + } } } @@ -375,6 +384,7 @@ pub(crate) fn compare_indexes_by_columns( Column::Decimal75(_, _, col) => col[i].signed_cmp(&col[j]), Column::Scalar(col) => col[i].cmp(&col[j]), Column::VarChar((col, _)) => col[i].cmp(col[j]), + Column::FixedSizeBinary(_, _) => unreachable!("FixedSizeBinary can't be ordered"), }) .find(|&ord| ord != Ordering::Equal) .unwrap_or(Ordering::Equal) @@ -401,6 +411,7 @@ pub(crate) fn compare_indexes_by_owned_columns( OwnedColumn::Decimal75(_, _, col) => col[i].signed_cmp(&col[j]), OwnedColumn::Scalar(col) => col[i].cmp(&col[j]), OwnedColumn::VarChar(col) => col[i].cmp(&col[j]), + OwnedColumn::FixedSizeBinary(_, _) => unreachable!("FixedSizeBinary can't be ordered"), }) .find(|&ord| ord != Ordering::Equal) .unwrap_or(Ordering::Equal) diff --git a/crates/proof-of-sql/src/base/database/owned_column.rs b/crates/proof-of-sql/src/base/database/owned_column.rs index cb5cc30f8..9a779b58c 100644 --- a/crates/proof-of-sql/src/base/database/owned_column.rs +++ b/crates/proof-of-sql/src/base/database/owned_column.rs @@ -44,11 +44,15 @@ pub enum OwnedColumn { Scalar(Vec), /// Timestamp columns TimestampTZ(PoSQLTimeUnit, PoSQLTimeZone, Vec), + /// Fixed size binary columns + /// - the i32 specifies the number of bytes per value + FixedSizeBinary(i32, Vec), } impl OwnedColumn { /// Returns the length of the column. #[must_use] + #[allow(clippy::missing_panics_doc)] pub fn len(&self) -> usize { match self { OwnedColumn::Boolean(col) => col.len(), @@ -59,6 +63,10 @@ impl OwnedColumn { OwnedColumn::VarChar(col) => col.len(), OwnedColumn::Int128(col) => col.len(), OwnedColumn::Decimal75(_, _, col) | OwnedColumn::Scalar(col) => col.len(), + OwnedColumn::FixedSizeBinary(byte_width, col) => { + assert!(*byte_width > 0, "Byte width must be greater than zero"); + col.len() / *byte_width as usize + } } } @@ -79,6 +87,10 @@ impl OwnedColumn { OwnedColumn::TimestampTZ(tu, tz, col) => { OwnedColumn::TimestampTZ(*tu, *tz, permutation.try_apply(col)?) } + OwnedColumn::FixedSizeBinary(byte_width, col) => OwnedColumn::FixedSizeBinary( + *byte_width, + permutation.try_chunked_apply(col, *byte_width as usize)?, + ), }) } @@ -100,6 +112,11 @@ impl OwnedColumn { OwnedColumn::TimestampTZ(tu, tz, col) => { OwnedColumn::TimestampTZ(*tu, *tz, col[start..end].to_vec()) } + OwnedColumn::FixedSizeBinary(byte_width, col) => { + let start_byte = start * *byte_width as usize; + let end_byte = end * *byte_width as usize; + OwnedColumn::FixedSizeBinary(*byte_width, col[start_byte..end_byte].to_vec()) + } } } @@ -115,6 +132,7 @@ impl OwnedColumn { OwnedColumn::VarChar(col) => col.is_empty(), OwnedColumn::Int128(col) => col.is_empty(), OwnedColumn::Scalar(col) | OwnedColumn::Decimal75(_, _, col) => col.is_empty(), + OwnedColumn::FixedSizeBinary(_, col) => col.is_empty(), } } /// Returns the type of the column. @@ -133,6 +151,7 @@ impl OwnedColumn { ColumnType::Decimal75(*precision, *scale) } OwnedColumn::TimestampTZ(tu, tz, _) => ColumnType::TimestampTZ(*tu, *tz), + OwnedColumn::FixedSizeBinary(size, _) => ColumnType::FixedSizeBinary(*size), } } @@ -207,6 +226,11 @@ impl OwnedColumn { })?; Ok(OwnedColumn::TimestampTZ(tu, tz, raw_values)) } + // Can not convert scalars to FixedSizeBinary + ColumnType::FixedSizeBinary(byte_width) => Err(OwnedColumnError::TypeCastError { + from_type: ColumnType::Scalar, + to_type: ColumnType::FixedSizeBinary(byte_width), + }), // Can not convert scalars to VarChar ColumnType::VarChar => Err(OwnedColumnError::TypeCastError { from_type: ColumnType::Scalar, @@ -321,6 +345,7 @@ impl<'a, S: Scalar> From<&Column<'a, S>> for OwnedColumn { } Column::Scalar(col) => OwnedColumn::Scalar(col.to_vec()), Column::TimestampTZ(tu, tz, col) => OwnedColumn::TimestampTZ(*tu, *tz, col.to_vec()), + Column::FixedSizeBinary(size, col) => OwnedColumn::FixedSizeBinary(*size, col.to_vec()), } } } @@ -347,6 +372,7 @@ pub(crate) fn compare_indexes_by_owned_columns_with_direction( OwnedColumn::Int128(col) => col[i].cmp(&col[j]), OwnedColumn::Decimal75(_, _, col) | OwnedColumn::Scalar(col) => col[i].cmp(&col[j]), OwnedColumn::VarChar(col) => col[i].cmp(&col[j]), + OwnedColumn::FixedSizeBinary(_, col) => col[i].cmp(&col[j]), }; match direction { OrderByDirection::Asc => ordering, @@ -381,6 +407,37 @@ mod test { ); } + #[test] + fn we_can_permute_a_fixed_size_binary_column() { + let byte_width = 4; + let col: OwnedColumn = OwnedColumn::FixedSizeBinary( + byte_width, + vec![ + 0x01, 0x02, 0x03, 0x04, // First element + 0x05, 0x06, 0x07, 0x08, // Second element + 0x09, 0x0A, 0x0B, 0x0C, // Third element + 0x0D, 0x0E, 0x0F, 0x10, // Fourth element + 0x11, 0x12, 0x13, 0x14, // Fifth element + ], + ); + + let permutation = Permutation::try_new(vec![1, 3, 4, 0, 2]).unwrap(); + + assert_eq!( + col.try_permute(&permutation).unwrap(), + OwnedColumn::FixedSizeBinary( + byte_width, + vec![ + 0x05, 0x06, 0x07, 0x08, // Second element + 0x0D, 0x0E, 0x0F, 0x10, // Fourth element + 0x11, 0x12, 0x13, 0x14, // Fifth element + 0x01, 0x02, 0x03, 0x04, // First element + 0x09, 0x0A, 0x0B, 0x0C, // Third element + ] + ) + ); + } + #[test] fn we_can_compare_columns() { let col1: OwnedColumn = OwnedColumn::SmallInt(vec![1, 1, 2, 1, 1]); diff --git a/crates/proof-of-sql/src/base/database/owned_column_operation.rs b/crates/proof-of-sql/src/base/database/owned_column_operation.rs index fba0f2032..55a5ecfbe 100644 --- a/crates/proof-of-sql/src/base/database/owned_column_operation.rs +++ b/crates/proof-of-sql/src/base/database/owned_column_operation.rs @@ -15,6 +15,7 @@ use crate::base::{ }, scalar::Scalar, }; +use alloc::vec::Vec; use core::ops::{Add, Div, Mul, Sub}; use proof_of_sql_parser::intermediate_ast::{BinaryOperator, UnaryOperator}; @@ -240,6 +241,27 @@ impl OwnedColumn { (Self::TimestampTZ(_, _, _), Self::TimestampTZ(_, _, _)) => { todo!("Implement equality check for TimeStampTZ") } + ( + Self::FixedSizeBinary(lhs_width, lhs_bytes), + Self::FixedSizeBinary(rhs_width, rhs_bytes), + ) => { + if lhs_width != rhs_width { + return Err(ColumnOperationError::FixedSizeBinaryByteSizeMismatch { + byte_size_a: *lhs_width, + byte_size_b: *rhs_width, + }); + } + // Split the bytes into chunks of the fixed size + let lhs_chunks = lhs_bytes.chunks(*lhs_width as usize); + let rhs_chunks = rhs_bytes.chunks(*rhs_width as usize); + + let result: Vec = lhs_chunks + .zip(rhs_chunks) + .map(|(lhs_chunk, rhs_chunk)| lhs_chunk == rhs_chunk) + .collect(); + + Ok(Self::Boolean(result)) + } _ => Err(ColumnOperationError::BinaryOperationInvalidColumnType { operator: BinaryOperator::Equal, left_type: self.column_type(), @@ -599,6 +621,15 @@ impl OwnedColumn { } (Self::Boolean(lhs), Self::Boolean(rhs)) => Ok(Self::Boolean(slice_ge(lhs, rhs))), (Self::Scalar(lhs), Self::Scalar(rhs)) => Ok(Self::Boolean(slice_ge(lhs, rhs))), + (Self::FixedSizeBinary(lhs_width, lhs), Self::FixedSizeBinary(rhs_width, rhs)) => { + if lhs_width != rhs_width { + return Err(ColumnOperationError::FixedSizeBinaryByteSizeMismatch { + byte_size_a: *lhs_width, + byte_size_b: *rhs_width, + }); + } + Ok(Self::Boolean(slice_eq(lhs, rhs))) + } (Self::TimestampTZ(_, _, _), Self::TimestampTZ(_, _, _)) => { todo!("Implement inequality check for TimeStampTZ") } @@ -796,6 +827,15 @@ impl Add for OwnedColumn { )?; Ok(Self::Decimal75(new_precision, new_scale, new_values)) } + (Self::FixedSizeBinary(lhs_width, lhs), Self::FixedSizeBinary(rhs_width, rhs)) => { + if lhs_width != rhs_width { + return Err(ColumnOperationError::FixedSizeBinaryByteSizeMismatch { + byte_size_a: *lhs_width, + byte_size_b: *rhs_width, + }); + } + Ok(Self::Boolean(slice_le(lhs, rhs))) + } _ => Err(ColumnOperationError::BinaryOperationInvalidColumnType { operator: BinaryOperator::Add, left_type: self.column_type(), @@ -1634,6 +1674,40 @@ mod test { true, false, true ])) ); + + let byte_width = 16; + let lhs_data = [ + vec![0u8; byte_width], + vec![1u8; byte_width], + vec![2u8; byte_width], + ]; + let rhs_data = [ + vec![0u8; byte_width], + vec![2u8; byte_width], + vec![2u8; byte_width], + ]; + + // Concatenate the data to match the expected format + let lhs_concatenated: Vec = lhs_data.concat(); + let rhs_concatenated: Vec = rhs_data.concat(); + + // Create OwnedColumn instances + let lhs = OwnedColumn::::FixedSizeBinary( + byte_width as i32, + lhs_concatenated.clone(), + ); + let rhs = OwnedColumn::::FixedSizeBinary( + byte_width as i32, + rhs_concatenated.clone(), + ); + + let result = lhs.element_wise_eq(&rhs); + assert_eq!( + result, + Ok(OwnedColumn::::Boolean(vec![ + true, false, true + ])) + ); } #[test] @@ -1868,6 +1942,37 @@ mod test { result, Err(ColumnOperationError::BinaryOperationInvalidColumnType { .. }) )); + + let byte_width = 16; + let lhs = OwnedColumn::::FixedSizeBinary( + byte_width, + vec![0u8; byte_width as usize * 3], + ); + let rhs = OwnedColumn::::TinyInt(vec![1, 2, 3]); + let result = lhs.element_wise_le(&rhs); + assert!(matches!( + result, + Err(ColumnOperationError::BinaryOperationInvalidColumnType { .. }) + )); + + let rhs = OwnedColumn::::Int(vec![1, 2, 3]); + let result = lhs.element_wise_le(&rhs); + assert!(matches!( + result, + Err(ColumnOperationError::BinaryOperationInvalidColumnType { .. }) + )); + + let rhs = OwnedColumn::::VarChar( + ["Space", "and", "Time"] + .iter() + .map(std::string::ToString::to_string) + .collect(), + ); + let result = lhs.element_wise_le(&rhs); + assert!(matches!( + result, + Err(ColumnOperationError::BinaryOperationInvalidColumnType { .. }) + )); } #[test] @@ -1906,6 +2011,43 @@ mod test { result, Err(ColumnOperationError::BinaryOperationInvalidColumnType { .. }) )); + + let byte_width = 16; // Example byte width + + // FixedSizeBinary cannot be used in arithmetic operations + let lhs = OwnedColumn::::FixedSizeBinary( + byte_width, + vec![0u8; byte_width as usize * 3], + ); + let rhs = OwnedColumn::::Scalar(vec![ + Curve25519Scalar::from(1), + Curve25519Scalar::from(2), + Curve25519Scalar::from(3), + ]); + + let result = lhs.clone() + rhs.clone(); + assert!(matches!( + result, + Err(ColumnOperationError::BinaryOperationInvalidColumnType { .. }) + )); + + let result = lhs.clone() - rhs.clone(); + assert!(matches!( + result, + Err(ColumnOperationError::BinaryOperationInvalidColumnType { .. }) + )); + + let result = lhs.clone() * rhs.clone(); + assert!(matches!( + result, + Err(ColumnOperationError::BinaryOperationInvalidColumnType { .. }) + )); + + let result = lhs / rhs; + assert!(matches!( + result, + Err(ColumnOperationError::BinaryOperationInvalidColumnType { .. }) + )); } #[test] diff --git a/crates/proof-of-sql/src/base/database/owned_table_test_accessor.rs b/crates/proof-of-sql/src/base/database/owned_table_test_accessor.rs index 3217ee9dd..72da129f8 100644 --- a/crates/proof-of-sql/src/base/database/owned_table_test_accessor.rs +++ b/crates/proof-of-sql/src/base/database/owned_table_test_accessor.rs @@ -111,6 +111,10 @@ impl DataAccessor for OwnedTableTestA Column::VarChar((col, scals)) } OwnedColumn::TimestampTZ(tu, tz, col) => Column::TimestampTZ(*tu, *tz, col), + OwnedColumn::FixedSizeBinary(byte_width, col) => { + let col: &mut [u8] = self.alloc.alloc_slice_copy(col); + Column::FixedSizeBinary(*byte_width, col) + } } } } diff --git a/crates/proof-of-sql/src/base/database/owned_table_utility.rs b/crates/proof-of-sql/src/base/database/owned_table_utility.rs index a008aa235..ef7e4988f 100644 --- a/crates/proof-of-sql/src/base/database/owned_table_utility.rs +++ b/crates/proof-of-sql/src/base/database/owned_table_utility.rs @@ -15,7 +15,7 @@ //! ``` use super::{OwnedColumn, OwnedTable}; use crate::base::scalar::Scalar; -use alloc::string::String; +use alloc::{string::String, vec::Vec}; use core::ops::Deref; use proof_of_sql_parser::{ posql_time::{PoSQLTimeUnit, PoSQLTimeZone}, @@ -283,3 +283,53 @@ pub fn timestamptz( OwnedColumn::TimestampTZ(time_unit, timezone, data.into_iter().collect()), ) } + +/// Creates a `(Identifier, OwnedColumn)` pair for a fixed-size binary column. +/// This is primarily intended for use in conjunction with [`owned_table`]. +/// +/// # Parameters +/// - `name`: The name of the column. +/// - `byte_width`: The fixed byte width for each binary entry. +/// - `data`: The data for the column, provided as an iterator over byte slices. +/// +/// # Example +/// ``` +/// use proof_of_sql::base::{database::owned_table_utility::*, +/// scalar::Curve25519Scalar, +/// }; +/// +/// let result = owned_table::([ +/// fixed_size_binary("binary_data", 16, vec![ +/// vec![0u8; 16], // Example 16-byte entries +/// vec![1u8; 16], +/// vec![2u8; 16], +/// ]), +/// ]); +/// ``` +/// +/// # Panics +/// - Panics if `name.parse()` fails to convert the name into an `Identifier`. +/// - Panics if any data entry does not match the specified `byte_width`. +pub fn fixed_size_binary( + name: impl Deref, + byte_width: usize, + data: impl IntoIterator>, +) -> (Identifier, OwnedColumn) { + let binary_data: Vec = data + .into_iter() + .flat_map(|entry| { + let bytes = entry.as_ref(); + assert_eq!( + bytes.len(), + byte_width, + "Data entry does not match byte width" + ); + bytes.to_vec() + }) + .collect(); + + ( + name.parse().unwrap(), + OwnedColumn::FixedSizeBinary(byte_width as i32, binary_data), + ) +} diff --git a/crates/proof-of-sql/src/base/math/permutation.rs b/crates/proof-of-sql/src/base/math/permutation.rs index f5466e422..f7d62146d 100644 --- a/crates/proof-of-sql/src/base/math/permutation.rs +++ b/crates/proof-of-sql/src/base/math/permutation.rs @@ -72,6 +72,40 @@ impl Permutation { }) } } + + /// Apply the permutation to chunks of the given size within the slice + pub fn try_chunked_apply( + &self, + slice: &[T], + chunk_size: usize, + ) -> Result, PermutationError> + where + T: Clone, + { + if slice.len() % chunk_size != 0 { + return Err(PermutationError::PermutationSizeMismatch { + permutation_size: self.size(), + slice_length: slice.len(), + }); + } + + let num_chunks = slice.len() / chunk_size; + if self.size() != num_chunks { + return Err(PermutationError::PermutationSizeMismatch { + permutation_size: self.size(), + slice_length: num_chunks, + }); + } + + let mut result = Vec::with_capacity(slice.len()); + for &i in &self.permutation { + let start = i * chunk_size; + let end = start + chunk_size; + result.extend_from_slice(&slice[start..end]); + } + + Ok(result) + } } #[cfg(test)] @@ -112,4 +146,26 @@ mod test { }) ); } + + #[test] + fn test_apply_chunked_permutation() { + let slice = vec![ + 0x01, 0x02, 0x03, 0x04, // First chunk + 0x05, 0x06, 0x07, 0x08, // Second chunk + 0x09, 0x0A, 0x0B, 0x0C, // Third chunk + ]; + let chunk_size = 4; + let permutation = Permutation::try_new(vec![2, 0, 1]).unwrap(); + + let permuted = permutation.try_chunked_apply(&slice, chunk_size).unwrap(); + + assert_eq!( + permuted, + vec![ + 0x09, 0x0A, 0x0B, 0x0C, // Third chunk + 0x01, 0x02, 0x03, 0x04, // First chunk + 0x05, 0x06, 0x07, 0x08, // Second chunk + ] + ); + } } diff --git a/crates/proof-of-sql/src/base/polynomial/multilinear_extension.rs b/crates/proof-of-sql/src/base/polynomial/multilinear_extension.rs index 99720707c..9012e959a 100644 --- a/crates/proof-of-sql/src/base/polynomial/multilinear_extension.rs +++ b/crates/proof-of-sql/src/base/polynomial/multilinear_extension.rs @@ -108,6 +108,7 @@ impl MultilinearExtension for &Column<'_, S> { Column::Int(c) => c.inner_product(evaluation_vec), Column::BigInt(c) | Column::TimestampTZ(_, _, c) => c.inner_product(evaluation_vec), Column::Int128(c) => c.inner_product(evaluation_vec), + Column::FixedSizeBinary(_, _) => unreachable!("FixedSizeBinary is not supported"), } } @@ -122,6 +123,7 @@ impl MultilinearExtension for &Column<'_, S> { Column::Int(c) => c.mul_add(res, multiplier), Column::BigInt(c) | Column::TimestampTZ(_, _, c) => c.mul_add(res, multiplier), Column::Int128(c) => c.mul_add(res, multiplier), + Column::FixedSizeBinary(_, _) => unreachable!("FixedSizeBinary is not supported"), } } @@ -136,6 +138,7 @@ impl MultilinearExtension for &Column<'_, S> { Column::Int(c) => c.to_sumcheck_term(num_vars), Column::BigInt(c) | Column::TimestampTZ(_, _, c) => c.to_sumcheck_term(num_vars), Column::Int128(c) => c.to_sumcheck_term(num_vars), + Column::FixedSizeBinary(_, _) => unreachable!("FixedSizeBinary is not supported"), } } @@ -150,6 +153,7 @@ impl MultilinearExtension for &Column<'_, S> { Column::Int(c) => MultilinearExtension::::id(c), Column::BigInt(c) | Column::TimestampTZ(_, _, c) => MultilinearExtension::::id(c), Column::Int128(c) => MultilinearExtension::::id(c), + Column::FixedSizeBinary(_, _) => unreachable!("FixedSizeBinary is not supported"), } } } diff --git a/crates/proof-of-sql/src/base/scalar/mont_scalar.rs b/crates/proof-of-sql/src/base/scalar/mont_scalar.rs index e471d2afd..40291a16e 100644 --- a/crates/proof-of-sql/src/base/scalar/mont_scalar.rs +++ b/crates/proof-of-sql/src/base/scalar/mont_scalar.rs @@ -131,18 +131,30 @@ macro_rules! impl_from_for_mont_scalar_for_type_supported_by_from { }; } -/// Implement `From<&[u8]>` for `MontScalar` impl> From<&[u8]> for MontScalar { + /// Converts a byte slice to a `MontScalar`. + /// + /// - If the byte slice is empty, the result is the zero scalar. + /// - If the byte slice has length 31 or less, the bytes are directly converted to a scalar. + /// - If the byte slice has length 32 or larger, the bytes are hashed using `blake3` and the result is + /// converted to a scalar. fn from(x: &[u8]) -> Self { - if x.is_empty() { - return Self::zero(); + match x.len() { + 0 => Self::zero(), + 1..=31 => { + // Convert directly if 31 bytes or less + let mut bytes = [0u8; 32]; + bytes[..x.len()].copy_from_slice(x); + Self::from_le_bytes_mod_order(&bytes) + } + _ => { + // Hash and convert if exactly 32 bytes + let hash = blake3::hash(x); + let mut bytes: [u8; 32] = hash.into(); + bytes[31] &= 0b0000_1111_u8; + Self::from_le_bytes_mod_order(&bytes) + } } - - let hash = blake3::hash(x); - let mut bytes: [u8; 32] = hash.into(); - bytes[31] &= 0b0000_1111_u8; - - Self::from_le_bytes_mod_order(&bytes) } } @@ -151,7 +163,16 @@ macro_rules! impl_from_for_mont_scalar_for_string { ($tt:ty) => { impl> From<$tt> for MontScalar { fn from(x: $tt) -> Self { - x.as_bytes().into() + let bytes = x.as_bytes(); + if bytes.is_empty() { + return Self::zero(); + } + + let hash = blake3::hash(bytes); + let mut bytes: [u8; 32] = hash.into(); + bytes[31] &= 0b0000_1111_u8; + + Self::from_le_bytes_mod_order(&bytes) } } }; diff --git a/crates/proof-of-sql/src/base/scalar/scalar.rs b/crates/proof-of-sql/src/base/scalar/scalar.rs index eb4104ab1..7b84ed044 100644 --- a/crates/proof-of-sql/src/base/scalar/scalar.rs +++ b/crates/proof-of-sql/src/base/scalar/scalar.rs @@ -49,6 +49,7 @@ pub trait Scalar: + core::ops::SubAssign + RefInto<[u64; 4]> + for<'a> core::convert::From<&'a String> + + for<'a> core::convert::From<&'a [u8]> + VarInt + core::convert::From + core::convert::From diff --git a/crates/proof-of-sql/src/proof_primitive/dory/blitzar_metadata_table.rs b/crates/proof-of-sql/src/proof_primitive/dory/blitzar_metadata_table.rs index 4c24b8d15..b668f1540 100644 --- a/crates/proof-of-sql/src/proof_primitive/dory/blitzar_metadata_table.rs +++ b/crates/proof-of-sql/src/proof_primitive/dory/blitzar_metadata_table.rs @@ -38,7 +38,8 @@ pub const fn min_as_f(column_type: ColumnType) -> F { ColumnType::Decimal75(_, _) | ColumnType::Scalar | ColumnType::VarChar - | ColumnType::Boolean => MontFp!("0"), + | ColumnType::Boolean + | ColumnType::FixedSizeBinary(_) => MontFp!("0"), } } @@ -129,6 +130,7 @@ fn copy_column_data_to_slice( | CommittableColumn::VarChar(column) => { scalar_row_slice[start..end].copy_from_slice(&column[index].offset_to_bytes()); } + CommittableColumn::FixedSizeBinary(_, _) => todo!(), CommittableColumn::RangeCheckWord(_) => todo!(), } } diff --git a/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_cpu.rs b/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_cpu.rs index 611e84f1b..f31df7808 100644 --- a/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_cpu.rs +++ b/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_cpu.rs @@ -76,6 +76,10 @@ fn compute_dory_commitment( CommittableColumn::RangeCheckWord(column) => { compute_dory_commitment_impl(column, offset, setup) } + CommittableColumn::FixedSizeBinary(byte_size, column) => { + let scalars: Vec<_> = column.chunks_exact(*byte_size as usize).collect(); + compute_dory_commitment_impl(&scalars, offset, setup) + } } } diff --git a/crates/proof-of-sql/src/proof_primitive/dory/dory_compute_commitments_test.rs b/crates/proof-of-sql/src/proof_primitive/dory/dory_compute_commitments_test.rs index 7e15110c7..f954397b5 100644 --- a/crates/proof-of-sql/src/proof_primitive/dory/dory_compute_commitments_test.rs +++ b/crates/proof-of-sql/src/proof_primitive/dory/dory_compute_commitments_test.rs @@ -41,6 +41,43 @@ fn we_can_compute_a_dory_commitment_with_boolean_values() { assert_eq!(res[0].0, expected); } +#[test] +fn we_can_compute_a_dory_commitment_with_fixed_size_binary_values() { + // Define the byte width for the FixedSizeBinary elements + let byte_width = 4; + + // Create a column with FixedSizeBinary data + let column = vec![ + 0x01, 0x02, 0x03, 0x04, // First element + 0x05, 0x06, 0x07, 0x08, // Second element + 0x09, 0x0A, 0x0B, 0x0C, // Third element + ]; + + // Initialize public parameters and setup + let public_parameters = PublicParameters::test_rand(5, &mut test_rng()); + let prover_setup = ProverSetup::from(&public_parameters); + let setup = DoryProverPublicSetup::new(&prover_setup, 2); + + // Compute the Dory commitments + let res = compute_dory_commitments( + &[CommittableColumn::FixedSizeBinary(byte_width, &column)], + 0, + &setup, + ); + + // Extract the Gamma values + let Gamma_1 = public_parameters.Gamma_1; + let Gamma_2 = public_parameters.Gamma_2; + + // Calculate the expected result + let expected: GT = Pairing::pairing(Gamma_1[0], Gamma_2[0]) * F::from(0x0102_0304) + + Pairing::pairing(Gamma_1[1], Gamma_2[0]) * F::from(0x0506_0708) + + Pairing::pairing(Gamma_1[2], Gamma_2[0]) * F::from(0x090A_0B0C); + + // Assert that the computed result matches the expected result + assert_eq!(res[0].0, expected); +} + #[test] fn we_can_compute_a_dory_commitment_with_only_one_row() { let public_parameters = PublicParameters::test_rand(5, &mut test_rng()); diff --git a/crates/proof-of-sql/src/proof_primitive/dory/dynamic_dory_commitment_helper_cpu.rs b/crates/proof-of-sql/src/proof_primitive/dory/dynamic_dory_commitment_helper_cpu.rs index b36e1177d..1614c5129 100644 --- a/crates/proof-of-sql/src/proof_primitive/dory/dynamic_dory_commitment_helper_cpu.rs +++ b/crates/proof-of-sql/src/proof_primitive/dory/dynamic_dory_commitment_helper_cpu.rs @@ -88,6 +88,10 @@ fn compute_dory_commitment( CommittableColumn::RangeCheckWord(column) => { compute_dory_commitment_impl(column, offset, setup) } + CommittableColumn::FixedSizeBinary(byte_width, column) => { + let scalars: Vec<_> = column.chunks_exact(*byte_width as usize).collect_vec(); + compute_dory_commitment_impl(&scalars, offset, setup) + } } } diff --git a/crates/proof-of-sql/src/proof_primitive/dory/pack_scalars.rs b/crates/proof-of-sql/src/proof_primitive/dory/pack_scalars.rs index a62c5c67b..cce9fcd89 100644 --- a/crates/proof-of-sql/src/proof_primitive/dory/pack_scalars.rs +++ b/crates/proof-of-sql/src/proof_primitive/dory/pack_scalars.rs @@ -437,7 +437,8 @@ pub fn bit_table_and_scalars_for_packed_msm( num_matrix_commitment_columns, ); } - CommittableColumn::RangeCheckWord(column) => { + CommittableColumn::RangeCheckWord(column) + | CommittableColumn::FixedSizeBinary(_, column) => { pack_bit( column, &mut packed_scalars, diff --git a/crates/proof-of-sql/src/sql/proof/provable_query_result.rs b/crates/proof-of-sql/src/sql/proof/provable_query_result.rs index b45d1dd18..17f38706b 100644 --- a/crates/proof-of-sql/src/sql/proof/provable_query_result.rs +++ b/crates/proof-of-sql/src/sql/proof/provable_query_result.rs @@ -124,6 +124,9 @@ impl ProvableQueryResult { ColumnType::TimestampTZ(_, _) => { decode_and_convert::(&self.data[offset..]) } + ColumnType::FixedSizeBinary(byte_width) => decode_and_convert::<&[u8], S>( + &self.data[offset..offset + byte_width as usize], + ), }?; val += *entry * x; offset += sz; @@ -209,6 +212,11 @@ impl ProvableQueryResult { offset += num_read; Ok((field.name(), OwnedColumn::TimestampTZ(tu, tz, col))) } + ColumnType::FixedSizeBinary(byte_width) => { + let (col, num_read) = decode_multiple_elements(&self.data[offset..], n)?; + offset += num_read * byte_width as usize; + Ok((field.name(), OwnedColumn::FixedSizeBinary(byte_width, col))) + } }) .collect::>()?, )?; diff --git a/crates/proof-of-sql/src/sql/proof/provable_result_column.rs b/crates/proof-of-sql/src/sql/proof/provable_result_column.rs index 1896fceec..72dba7fd4 100644 --- a/crates/proof-of-sql/src/sql/proof/provable_result_column.rs +++ b/crates/proof-of-sql/src/sql/proof/provable_result_column.rs @@ -39,6 +39,7 @@ impl ProvableResultColumn for Column<'_, S> { Column::Int128(col) => col.num_bytes(length), Column::Decimal75(_, _, col) | Column::Scalar(col) => col.num_bytes(length), Column::VarChar((col, _)) => col.num_bytes(length), + Column::FixedSizeBinary(_, col) => col.num_bytes(length), } } @@ -52,6 +53,7 @@ impl ProvableResultColumn for Column<'_, S> { Column::Int128(col) => col.write(out, length), Column::Decimal75(_, _, col) | Column::Scalar(col) => col.write(out, length), Column::VarChar((col, _)) => col.write(out, length), + Column::FixedSizeBinary(_, col) => col.write(out, length), } } } diff --git a/crates/proof-of-sql/src/sql/proof/verifiable_query_result.rs b/crates/proof-of-sql/src/sql/proof/verifiable_query_result.rs index f65ef23b4..e5ea41d71 100644 --- a/crates/proof-of-sql/src/sql/proof/verifiable_query_result.rs +++ b/crates/proof-of-sql/src/sql/proof/verifiable_query_result.rs @@ -170,6 +170,9 @@ fn make_empty_query_result(result_fields: &[ColumnField]) -> QueryRes ColumnType::Scalar => OwnedColumn::Scalar(vec![]), ColumnType::VarChar => OwnedColumn::VarChar(vec![]), ColumnType::TimestampTZ(tu, tz) => OwnedColumn::TimestampTZ(tu, tz, vec![]), + ColumnType::FixedSizeBinary(byte_width) => { + OwnedColumn::FixedSizeBinary(byte_width, vec![]) + } }, ) }) diff --git a/docs/SQLSyntaxSpecification.md b/docs/SQLSyntaxSpecification.md index 346c88057..197392c45 100644 --- a/docs/SQLSyntaxSpecification.md +++ b/docs/SQLSyntaxSpecification.md @@ -25,6 +25,8 @@ FROM table * Varchar [^1] - Date / Time Types * Timestamp + - Binary Types + * FixedSizeBinary * Operators - Logical Operators * AND, OR From efd18a0cc9057740cbd33cb1cad396f9bfed2223 Mon Sep 17 00:00:00 2001 From: Tarek Date: Mon, 4 Nov 2024 19:16:35 +0200 Subject: [PATCH 2/2] fix: use `Sequence::from_raw_parts_with_size` for `FixedSizeBinary` Signed-off-by: Tarek --- Cargo.toml | 2 +- .../src/base/commitment/committable_column.rs | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 035636f51..e38299462 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,7 +25,7 @@ arrow-csv = { version = "51.0.0" } bit-iter = { version = "1.1.1" } bigdecimal = { version = "0.4.5", default-features = false, features = ["serde"] } blake3 = { version = "1.3.3", default-features = false } -blitzar = { version = "3.4.0" } +blitzar = { version = "3.5.0" } bumpalo = { version = "3.11.0" } bytemuck = {version = "1.16.3", features = ["derive"]} byte-slice-cast = { version = "1.2.1", default-features = false } diff --git a/crates/proof-of-sql/src/base/commitment/committable_column.rs b/crates/proof-of-sql/src/base/commitment/committable_column.rs index 50db54e91..1c5d2f074 100644 --- a/crates/proof-of-sql/src/base/commitment/committable_column.rs +++ b/crates/proof-of-sql/src/base/commitment/committable_column.rs @@ -236,8 +236,9 @@ impl<'a, 'b> From<&'a CommittableColumn<'b>> for Sequence<'a> { CommittableColumn::Boolean(bools) => Sequence::from(*bools), CommittableColumn::TimestampTZ(_, _, times) => Sequence::from(*times), CommittableColumn::RangeCheckWord(words) => Sequence::from(*words), - // FIXME: Is this the correct way to convert a FixedSizeBinary column to a Sequence? - CommittableColumn::FixedSizeBinary(_, bytes) => Sequence::from(*bytes), + CommittableColumn::FixedSizeBinary(byte_size, bytes) => { + Sequence::from_raw_parts_with_size(bytes, *byte_size as usize, false) + } } } } @@ -1090,7 +1091,11 @@ mod tests { let committable_column = CommittableColumn::FixedSizeBinary(byte_width, &concatenated_data); let sequence_actual = Sequence::from(&committable_column); - let sequence_expected = Sequence::from(concatenated_data.as_slice()); + let sequence_expected = Sequence::from_raw_parts_with_size( + concatenated_data.as_slice(), + byte_width as usize, + false, + ); let mut commitment_buffer = [CompressedRistretto::default(); 2]; compute_curve25519_commitments( &mut commitment_buffer,