From 36ee587401eda65c7d8dd8085d44fceb65f84b00 Mon Sep 17 00:00:00 2001 From: Phoebe Goldman Date: Wed, 8 Jan 2025 12:02:51 -0500 Subject: [PATCH 01/11] Page: track number of allocated var-len granules Definition of `Page::bytes_used_by_rows` to follow. This change seemed to stand on its own enough to deserve a separate commit. --- crates/table/src/page.rs | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/crates/table/src/page.rs b/crates/table/src/page.rs index c1084f8fcc9..082c582a31e 100644 --- a/crates/table/src/page.rs +++ b/crates/table/src/page.rs @@ -248,6 +248,12 @@ struct VarHeader { /// pre-decrement this index. // TODO(perf,future-work): determine how to "lower" the high water mark when freeing the "top"-most granule. first: PageOffset, + + /// The number of granules currently used by rows within this page. + /// + /// [`Page::bytes_used_by_rows`] needs this information. + /// Stored here because otherwise counting it would require traversing all the present rows. + num_granules: u16, } impl MemoryUsage for VarHeader { @@ -256,12 +262,13 @@ impl MemoryUsage for VarHeader { next_free, freelist_len, first, + num_granules, } = self; - next_free.heap_usage() + freelist_len.heap_usage() + first.heap_usage() + next_free.heap_usage() + freelist_len.heap_usage() + first.heap_usage() + num_granules.heap_usage() } } -static_assert_size!(VarHeader, 6); +static_assert_size!(VarHeader, 8); impl Default for VarHeader { fn default() -> Self { @@ -269,6 +276,7 @@ impl Default for VarHeader { next_free: FreeCellRef::NIL, freelist_len: 0, first: PageOffset::PAGE_END, + num_granules: 0, } } } @@ -771,6 +779,8 @@ impl<'page> VarView<'page> { granule, ); + self.header.num_granules += 1; + Ok(granule) } @@ -812,6 +822,7 @@ impl<'page> VarView<'page> { // but we want to return a whole "run" of sequential freed chunks, // which requries some bookkeeping (or an O(> n) linked list traversal). self.header.freelist_len += 1; + self.header.num_granules -= 1; let adjuster = self.adjuster(); // SAFETY: Per caller contract, `offset` is a valid `VarLenGranule`, @@ -1112,10 +1123,19 @@ impl Page { } /// Returns the number of rows stored in this page. + /// + /// This method runs in constant time. pub fn num_rows(&self) -> usize { self.header.fixed.num_rows as usize } + /// Returns the number of var-len granules allocated in this page. + /// + /// This method runs in constant time. + pub fn num_var_len_granules(&self) -> usize { + self.header.var.num_granules as usize + } + /// Returns the range of row data starting at `offset` and lasting `size` bytes. pub fn get_row_data(&self, row: PageOffset, size: Size) -> &Bytes { &self.row_data[row.range(size)] From ef8d40e28c28a038202895fa59e76caf97909f6a Mon Sep 17 00:00:00 2001 From: Phoebe Goldman Date: Wed, 8 Jan 2025 12:05:04 -0500 Subject: [PATCH 02/11] `Table::num_rows` and `Table::bytes_used_by_rows` We intend to bill based on these predictable metrics, rather than the somewhat-unpredictable actual heap memory usage of the system. As such, we need a way to compute them (duh). This commit adds `Table` methods for computing the number of resident rows, and the number of bytes stored by those rows. --- crates/table/src/page.rs | 19 +++++++++++++++++++ crates/table/src/table.rs | 29 +++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/crates/table/src/page.rs b/crates/table/src/page.rs index 082c582a31e..17dd24c97b5 100644 --- a/crates/table/src/page.rs +++ b/crates/table/src/page.rs @@ -1136,6 +1136,25 @@ impl Page { self.header.var.num_granules as usize } + /// Returns the number of bytes used by rows stored in this page. + /// + /// This is necessarily an overestimate of live data bytes, as it includes: + /// - Padding bytes within the fixed-length portion of the rows. + /// - [`VarLenRef`] pointer-like portions of rows. + /// - Unused trailing parts of partially-filled [`VarLenGranule`]s. + /// - [`VarLenGranule`]s used to store [`BlobHash`]es. + /// + /// Note that large blobs themselves are not counted. + /// The caller should obtain a count of the bytes used by large blobs + /// from the [`super::blob_store::BlobStore`]. + /// + /// This method runs in constant time. + pub fn bytes_used_by_rows(&self, fixed_row_size: Size) -> usize { + let fixed_row_bytes = self.num_rows() * fixed_row_size.len(); + let var_len_bytes = self.num_var_len_granules() * VarLenGranule::SIZE.len(); + fixed_row_bytes + var_len_bytes + } + /// Returns the range of row data starting at `offset` and lasting `size` bytes. pub fn get_row_data(&self, row: PageOffset, size: Size) -> &Bytes { &self.row_data[row.range(size)] diff --git a/crates/table/src/table.rs b/crates/table/src/table.rs index 1ba60f3c8a4..f8e42dc8466 100644 --- a/crates/table/src/table.rs +++ b/crates/table/src/table.rs @@ -915,6 +915,35 @@ impl Table { self.compute_row_count(blob_store); self.rebuild_pointer_map(blob_store); } + + /// Returns the number of rows resident in this table. + /// + /// This scales in runtime with the number of pages in the table. + pub fn num_rows(&self) -> u64 { + self.pages().iter().map(|page| page.num_rows() as u64).sum() + } + + /// Returns the number of bytes used by rows resident in this table. + /// + /// This includes data bytes, padding bytes and some overhead bytes, + /// as described in the docs for [`Page::bytes_used_by_rows`], + /// but *does not* include: + /// + /// - Unallocated space within pages. + /// - Per-page overhead (e.g. page headers). + /// - Table overhead (e.g. the [`RowTypeLayout`], [`PointerMap`], [`Schema`] &c). + /// - Indices. + // TODO(energy): count memory usage by indices. + /// - Large blobs in the [`BlobStore`]. + /// + /// Of these, the caller should inspect the blob store in order to account for memory usage by large blobs, + /// but we intend to eat all the other overheads when billing. + pub fn bytes_used_by_rows(&self) -> u64 { + self.pages() + .iter() + .map(|page| page.bytes_used_by_rows(self.inner.row_layout.size()) as u64) + .sum() + } } /// A reference to a single row within a table. From 66a4270c3c559612e7de2113990c2cf159ee918d Mon Sep 17 00:00:00 2001 From: Phoebe Goldman Date: Tue, 14 Jan 2025 12:40:34 -0500 Subject: [PATCH 03/11] Operator to compute index data size Per out-of-band discussion, I am not sure this computation will actually be useful to us, but it is the thing I can compute at this time. See comment on `BTreeIndex::num_key_bytes` in btree_index.rs for the specific counting implemented here. --- crates/table/src/btree_index.rs | 280 ++++++++++++++++++++++++++++---- 1 file changed, 249 insertions(+), 31 deletions(-) diff --git a/crates/table/src/btree_index.rs b/crates/table/src/btree_index.rs index 6635370aa61..9df558358ae 100644 --- a/crates/table/src/btree_index.rs +++ b/crates/table/src/btree_index.rs @@ -25,9 +25,11 @@ use super::indexes::RowPointer; use super::table::RowRef; use crate::{read_column::ReadColumn, static_assert_size, MemoryUsage}; use core::ops::RangeBounds; +use spacetimedb_lib::ProductValue; use spacetimedb_primitives::{ColList, IndexId}; use spacetimedb_sats::{ - algebraic_value::Packed, i256, product_value::InvalidFieldError, u256, AlgebraicType, AlgebraicValue, ProductType, + algebraic_value::Packed, i256, product_value::InvalidFieldError, u256, AlgebraicType, AlgebraicValue, ArrayValue, + ProductType, SumValue, }; mod multimap; @@ -321,29 +323,44 @@ impl TypedIndex { /// or may insert a nonsense value into the index. /// Note, however, that it will not invoke undefined behavior. /// - /// Returns `Ok(Some(existing_row))` if this index was a unique index that was violated. - /// The index is not inserted to in that case. - fn insert(&mut self, cols: &ColList, row_ref: RowRef<'_>) -> Result, InvalidFieldError> { - fn mm_insert_at_type( + /// The returned `usize` is the number of bytes used by the key. + /// [`BTreeIndex::check_and_insert`] will use this + /// to update the counter for [`BTreeIndex::num_key_bytes`]. + /// We want to store said counter outside of the [`TypedIndex`] enum, + /// but we can only compute the size using type info within the [`TypedIndex`], + /// so we have to return the size across this boundary. + /// + /// Returns `Ok((Some(existing_row), key_size))` if this index was a unique index that was violated. + /// The new entry is not inserted to in that case. + /// + /// Returns `Ok((None, key_size))` if the new entry was successfully inserted into the index. + fn insert( + &mut self, + cols: &ColList, + row_ref: RowRef<'_>, + ) -> Result<(Option, usize), InvalidFieldError> { + fn mm_insert_at_type( this: &mut Index, cols: &ColList, row_ref: RowRef<'_>, - ) -> Result, InvalidFieldError> { + ) -> Result<(Option, usize), InvalidFieldError> { let col_pos = cols.as_singleton().unwrap(); - let key = row_ref.read_col(col_pos).map_err(|_| col_pos)?; + let key: T = row_ref.read_col(col_pos).map_err(|_| col_pos)?; + let key_size = key.key_size_in_bytes(); this.insert(key, row_ref.pointer()); - Ok(None) + Ok((None, key_size)) } - fn um_insert_at_type( + fn um_insert_at_type( this: &mut UniqueIndex, cols: &ColList, row_ref: RowRef<'_>, - ) -> Result, InvalidFieldError> { + ) -> Result<(Option, usize), InvalidFieldError> { let col_pos = cols.as_singleton().unwrap(); - let key = row_ref.read_col(col_pos).map_err(|_| col_pos)?; - Ok(this.insert(key, row_ref.pointer()).copied()) + let key: T = row_ref.read_col(col_pos).map_err(|_| col_pos)?; + let key_size = key.key_size_in_bytes(); + Ok((this.insert(key, row_ref.pointer()).copied(), key_size)) } - let unique_violation = match self { + let (unique_violation, key_size) = match self { Self::Bool(idx) => mm_insert_at_type(idx, cols, row_ref), Self::U8(idx) => mm_insert_at_type(idx, cols, row_ref), Self::I8(idx) => mm_insert_at_type(idx, cols, row_ref), @@ -360,8 +377,9 @@ impl TypedIndex { Self::String(idx) => mm_insert_at_type(idx, cols, row_ref), Self::AV(this) => { let key = row_ref.project(cols)?; + let key_size = key.key_size_in_bytes(); this.insert(key, row_ref.pointer()); - Ok(None) + Ok((None, key_size)) } Self::UniqueBool(idx) => um_insert_at_type(idx, cols, row_ref), Self::UniqueU8(idx) => um_insert_at_type(idx, cols, row_ref), @@ -379,10 +397,11 @@ impl TypedIndex { Self::UniqueString(idx) => um_insert_at_type(idx, cols, row_ref), Self::UniqueAV(this) => { let key = row_ref.project(cols)?; - Ok(this.insert(key, row_ref.pointer()).copied()) + let key_size = key.key_size_in_bytes(); + Ok((this.insert(key, row_ref.pointer()).copied(), key_size)) } }?; - Ok(unique_violation) + Ok((unique_violation, key_size)) } /// Remove the row referred to by `row_ref` from the index `self`, @@ -393,24 +412,34 @@ impl TypedIndex { /// this will behave oddly; it may return an error, do nothing, /// or remove the wrong value from the index. /// Note, however, that it will not invoke undefined behavior. - fn delete(&mut self, cols: &ColList, row_ref: RowRef<'_>) -> Result { - fn mm_delete_at_type( + /// + /// If the row was present and has been deleted, returns `Ok(Some(key_size_in_bytes))`, + /// where `key_size_in_bytes` is the size of the key. + /// [`BTreeIndex::delete`] will use this + /// to update the counter for [`BTreeIndex::num_key_bytes`]. + /// We want to store said counter outside of the [`TypedIndex`] enum, + /// but we can only compute the size using type info within the [`TypedIndex`], + /// so we have to return the size across this boundary. + fn delete(&mut self, cols: &ColList, row_ref: RowRef<'_>) -> Result, InvalidFieldError> { + fn mm_delete_at_type( this: &mut Index, cols: &ColList, row_ref: RowRef<'_>, - ) -> Result { + ) -> Result, InvalidFieldError> { let col_pos = cols.as_singleton().unwrap(); - let key = row_ref.read_col(col_pos).map_err(|_| col_pos)?; - Ok(this.delete(&key, &row_ref.pointer())) + let key: T = row_ref.read_col(col_pos).map_err(|_| col_pos)?; + let key_size = key.key_size_in_bytes(); + Ok(this.delete(&key, &row_ref.pointer()).then_some(key_size)) } - fn um_delete_at_type( + fn um_delete_at_type( this: &mut UniqueIndex, cols: &ColList, row_ref: RowRef<'_>, - ) -> Result { + ) -> Result, InvalidFieldError> { let col_pos = cols.as_singleton().unwrap(); - let key = row_ref.read_col(col_pos).map_err(|_| col_pos)?; - Ok(this.delete(&key)) + let key: T = row_ref.read_col(col_pos).map_err(|_| col_pos)?; + let key_size = key.key_size_in_bytes(); + Ok(this.delete(&key).then_some(key_size)) } match self { @@ -430,7 +459,8 @@ impl TypedIndex { Self::String(this) => mm_delete_at_type(this, cols, row_ref), Self::AV(this) => { let key = row_ref.project(cols)?; - Ok(this.delete(&key, &row_ref.pointer())) + let key_size = key.key_size_in_bytes(); + Ok(this.delete(&key, &row_ref.pointer()).then_some(key_size)) } Self::UniqueBool(this) => um_delete_at_type(this, cols, row_ref), Self::UniqueU8(this) => um_delete_at_type(this, cols, row_ref), @@ -448,7 +478,8 @@ impl TypedIndex { Self::UniqueString(this) => um_delete_at_type(this, cols, row_ref), Self::UniqueAV(this) => { let key = row_ref.project(cols)?; - Ok(this.delete(&key)) + let key_size = key.key_size_in_bytes(); + Ok(this.delete(&key).then_some(key_size)) } } } @@ -626,6 +657,124 @@ impl TypedIndex { } } +trait KeySize { + fn key_size_in_bytes(&self) -> usize; +} + +macro_rules! impl_key_size_primitive { + ($prim:ty) => { + impl KeySize for $prim { + fn key_size_in_bytes(&self) -> usize { std::mem::size_of::() } + } + }; + ($($prim:ty,)*) => { + $(impl_key_size_primitive!($prim);)* + }; +} + +impl_key_size_primitive!( + bool, + u8, + i8, + u16, + i16, + u32, + i32, + u64, + i64, + u128, + i128, + spacetimedb_sats::algebraic_value::Packed, + spacetimedb_sats::algebraic_value::Packed, + u256, + i256, + spacetimedb_sats::F32, + spacetimedb_sats::F64, +); + +impl KeySize for Box { + fn key_size_in_bytes(&self) -> usize { + self.len() + std::mem::size_of::() + } +} + +impl KeySize for AlgebraicValue { + fn key_size_in_bytes(&self) -> usize { + match self { + AlgebraicValue::Bool(x) => x.key_size_in_bytes(), + AlgebraicValue::U8(x) => x.key_size_in_bytes(), + AlgebraicValue::I8(x) => x.key_size_in_bytes(), + AlgebraicValue::U16(x) => x.key_size_in_bytes(), + AlgebraicValue::I16(x) => x.key_size_in_bytes(), + AlgebraicValue::U32(x) => x.key_size_in_bytes(), + AlgebraicValue::I32(x) => x.key_size_in_bytes(), + AlgebraicValue::U64(x) => x.key_size_in_bytes(), + AlgebraicValue::I64(x) => x.key_size_in_bytes(), + AlgebraicValue::U128(x) => x.key_size_in_bytes(), + AlgebraicValue::I128(x) => x.key_size_in_bytes(), + AlgebraicValue::U256(x) => x.key_size_in_bytes(), + AlgebraicValue::I256(x) => x.key_size_in_bytes(), + AlgebraicValue::F32(x) => x.key_size_in_bytes(), + AlgebraicValue::F64(x) => x.key_size_in_bytes(), + AlgebraicValue::String(x) => x.key_size_in_bytes(), + AlgebraicValue::Sum(x) => x.key_size_in_bytes(), + AlgebraicValue::Product(x) => x.key_size_in_bytes(), + AlgebraicValue::Array(x) => x.key_size_in_bytes(), + + AlgebraicValue::Min | AlgebraicValue::Max => unreachable!(), + } + } +} + +impl KeySize for SumValue { + fn key_size_in_bytes(&self) -> usize { + 1 + self.value.key_size_in_bytes() + } +} + +impl KeySize for ProductValue { + fn key_size_in_bytes(&self) -> usize { + self.elements.key_size_in_bytes() + } +} + +impl KeySize for [K] +where + K: KeySize, +{ + // TODO(perf, bikeshedding): check that this optimized to `size_of::() * self.len()` + // when `K` is a primitive. + fn key_size_in_bytes(&self) -> usize { + self.iter().map(|elt| elt.key_size_in_bytes()).sum() + } +} + +impl KeySize for ArrayValue { + fn key_size_in_bytes(&self) -> usize { + match self { + ArrayValue::Sum(elts) => elts.key_size_in_bytes(), + ArrayValue::Product(elts) => elts.key_size_in_bytes(), + ArrayValue::Bool(elts) => elts.key_size_in_bytes(), + ArrayValue::I8(elts) => elts.key_size_in_bytes(), + ArrayValue::U8(elts) => elts.key_size_in_bytes(), + ArrayValue::I16(elts) => elts.key_size_in_bytes(), + ArrayValue::U16(elts) => elts.key_size_in_bytes(), + ArrayValue::I32(elts) => elts.key_size_in_bytes(), + ArrayValue::U32(elts) => elts.key_size_in_bytes(), + ArrayValue::I64(elts) => elts.key_size_in_bytes(), + ArrayValue::U64(elts) => elts.key_size_in_bytes(), + ArrayValue::I128(elts) => elts.key_size_in_bytes(), + ArrayValue::U128(elts) => elts.key_size_in_bytes(), + ArrayValue::I256(elts) => elts.key_size_in_bytes(), + ArrayValue::U256(elts) => elts.key_size_in_bytes(), + ArrayValue::F32(elts) => elts.key_size_in_bytes(), + ArrayValue::F64(elts) => elts.key_size_in_bytes(), + ArrayValue::String(elts) => elts.key_size_in_bytes(), + ArrayValue::Array(elts) => elts.key_size_in_bytes(), + } + } +} + /// A B-Tree based index on a set of [`ColId`]s of a table. #[derive(Debug, PartialEq, Eq)] pub struct BTreeIndex { @@ -635,7 +784,19 @@ pub struct BTreeIndex { idx: TypedIndex, /// The key type of this index. /// This is the projection of the row type to the types of the columns indexed. + // TODO(perf, bikeshedding): Could trim `sizeof(BTreeIndex)` to 64 if this was `Box`. pub key_type: AlgebraicType, + + /// The number of rows in this index. + /// + /// Memoized counter for [`Self::num_rows`]. + num_rows: u64, + + /// The number of key bytes in this index. + /// + /// Memoized counter for [`Self::num_key_bytes`]. + /// See that method for more detailed documentation. + num_key_bytes: u64, } impl MemoryUsage for BTreeIndex { @@ -644,12 +805,18 @@ impl MemoryUsage for BTreeIndex { index_id, idx, key_type, + num_rows, + num_key_bytes, } = self; - index_id.heap_usage() + idx.heap_usage() + key_type.heap_usage() + index_id.heap_usage() + + idx.heap_usage() + + key_type.heap_usage() + + num_rows.heap_usage() + + num_key_bytes.heap_usage() } } -static_assert_size!(BTreeIndex, 64); +static_assert_size!(BTreeIndex, 80); impl BTreeIndex { /// Returns a new possibly unique index, with `index_id` for a set of columns. @@ -665,6 +832,8 @@ impl BTreeIndex { index_id, idx: typed_index, key_type, + num_rows: 0, + num_key_bytes: 0, }) } @@ -678,6 +847,8 @@ impl BTreeIndex { index_id, idx, key_type, + num_rows: 0, + num_key_bytes: 0, } } @@ -695,14 +866,30 @@ impl BTreeIndex { cols: &ColList, row_ref: RowRef<'_>, ) -> Result, InvalidFieldError> { - self.idx.insert(cols, row_ref) + let (res, size_in_bytes) = self.idx.insert(cols, row_ref)?; + if res.is_none() { + // No existing row; the new row was inserted. + // Update the `num_rows` and `num_key_bytes` counters + // to account for the new insertion. + self.num_rows += 1; + self.num_key_bytes += size_in_bytes as u64; + } + Ok(res) } /// Deletes `ptr` with its indexed value `col_value` from this index. /// /// Returns whether `ptr` was present. pub fn delete(&mut self, cols: &ColList, row_ref: RowRef<'_>) -> Result { - self.idx.delete(cols, row_ref) + if let Some(size_in_bytes) = self.idx.delete(cols, row_ref)? { + // Was present, and deleted: update the `num_rows` and `num_key_bytes` counters. + self.num_rows -= 1; + self.num_key_bytes -= size_in_bytes as u64; + Ok(true) + } else { + // Was not present: don't update counters. + Ok(false) + } } /// Returns whether `value` is in this index. @@ -741,12 +928,43 @@ impl BTreeIndex { /// rather than constructing a new `BTreeIndex`. pub fn clear(&mut self) { self.idx.clear(); + self.num_key_bytes = 0; + self.num_rows = 0; } /// The number of unique keys in this index. pub fn num_keys(&self) -> usize { self.idx.num_keys() } + + /// The number of rows stored in this index. + /// + /// Note that, for non-unique indexes, this may be larger than [`Self::num_keys`]. + /// + /// This method runs in constant time. + pub fn num_rows(&self) -> u64 { + self.num_rows + } + + /// The number of bytes stored in keys in this index. + /// + /// For non-unique indexes, duplicate keys are counted once for each row that refers to them, + /// even though the internal storage may deduplicate them as an optimization. + /// + /// This method runs in constant time. + /// + /// The key bytes of a value are defined depending on that value's type: + /// - Integer, float and boolean values take key bytes according to their [`std::mem::size_of`]. + /// - Strings take key bytes equal to their length in bytes. + /// No overhead is counted, unlike in the BFLATN or BSATN size. + /// - Sum values take 1 key byte for the tag, plus the key bytes of their active payload. + /// Inactive variants and padding are not counted, unlike in the BFLATN size. + /// - Product values take key bytes equal to the sum of their elements' key bytes. + /// Padding is not counted, unlike in the BFLATN size. + /// - Array values take key bytes equal to the sum of their elements' key bytes. + pub fn num_key_bytes(&self) -> u64 { + self.num_key_bytes + } } #[cfg(test)] From d49dd8084232224bbb31fcfcab4091427b0bd42f Mon Sep 17 00:00:00 2001 From: Phoebe Goldman Date: Wed, 15 Jan 2025 09:34:11 -0500 Subject: [PATCH 04/11] Move `KeySize` to its own file; export and document it --- crates/table/src/btree_index.rs | 135 +-------------------- crates/table/src/btree_index/key_size.rs | 143 +++++++++++++++++++++++ 2 files changed, 148 insertions(+), 130 deletions(-) create mode 100644 crates/table/src/btree_index/key_size.rs diff --git a/crates/table/src/btree_index.rs b/crates/table/src/btree_index.rs index 9df558358ae..76ad37de312 100644 --- a/crates/table/src/btree_index.rs +++ b/crates/table/src/btree_index.rs @@ -25,16 +25,17 @@ use super::indexes::RowPointer; use super::table::RowRef; use crate::{read_column::ReadColumn, static_assert_size, MemoryUsage}; use core::ops::RangeBounds; -use spacetimedb_lib::ProductValue; use spacetimedb_primitives::{ColList, IndexId}; use spacetimedb_sats::{ - algebraic_value::Packed, i256, product_value::InvalidFieldError, u256, AlgebraicType, AlgebraicValue, ArrayValue, - ProductType, SumValue, + algebraic_value::Packed, i256, product_value::InvalidFieldError, u256, AlgebraicType, AlgebraicValue, ProductType, }; +mod key_size; mod multimap; mod uniquemap; +pub use key_size::KeySize; + type Index = multimap::MultiMap; type IndexIter<'a, K> = multimap::MultiMapRangeIter<'a, K, RowPointer>; type UniqueIndex = uniquemap::UniqueMap; @@ -657,124 +658,6 @@ impl TypedIndex { } } -trait KeySize { - fn key_size_in_bytes(&self) -> usize; -} - -macro_rules! impl_key_size_primitive { - ($prim:ty) => { - impl KeySize for $prim { - fn key_size_in_bytes(&self) -> usize { std::mem::size_of::() } - } - }; - ($($prim:ty,)*) => { - $(impl_key_size_primitive!($prim);)* - }; -} - -impl_key_size_primitive!( - bool, - u8, - i8, - u16, - i16, - u32, - i32, - u64, - i64, - u128, - i128, - spacetimedb_sats::algebraic_value::Packed, - spacetimedb_sats::algebraic_value::Packed, - u256, - i256, - spacetimedb_sats::F32, - spacetimedb_sats::F64, -); - -impl KeySize for Box { - fn key_size_in_bytes(&self) -> usize { - self.len() + std::mem::size_of::() - } -} - -impl KeySize for AlgebraicValue { - fn key_size_in_bytes(&self) -> usize { - match self { - AlgebraicValue::Bool(x) => x.key_size_in_bytes(), - AlgebraicValue::U8(x) => x.key_size_in_bytes(), - AlgebraicValue::I8(x) => x.key_size_in_bytes(), - AlgebraicValue::U16(x) => x.key_size_in_bytes(), - AlgebraicValue::I16(x) => x.key_size_in_bytes(), - AlgebraicValue::U32(x) => x.key_size_in_bytes(), - AlgebraicValue::I32(x) => x.key_size_in_bytes(), - AlgebraicValue::U64(x) => x.key_size_in_bytes(), - AlgebraicValue::I64(x) => x.key_size_in_bytes(), - AlgebraicValue::U128(x) => x.key_size_in_bytes(), - AlgebraicValue::I128(x) => x.key_size_in_bytes(), - AlgebraicValue::U256(x) => x.key_size_in_bytes(), - AlgebraicValue::I256(x) => x.key_size_in_bytes(), - AlgebraicValue::F32(x) => x.key_size_in_bytes(), - AlgebraicValue::F64(x) => x.key_size_in_bytes(), - AlgebraicValue::String(x) => x.key_size_in_bytes(), - AlgebraicValue::Sum(x) => x.key_size_in_bytes(), - AlgebraicValue::Product(x) => x.key_size_in_bytes(), - AlgebraicValue::Array(x) => x.key_size_in_bytes(), - - AlgebraicValue::Min | AlgebraicValue::Max => unreachable!(), - } - } -} - -impl KeySize for SumValue { - fn key_size_in_bytes(&self) -> usize { - 1 + self.value.key_size_in_bytes() - } -} - -impl KeySize for ProductValue { - fn key_size_in_bytes(&self) -> usize { - self.elements.key_size_in_bytes() - } -} - -impl KeySize for [K] -where - K: KeySize, -{ - // TODO(perf, bikeshedding): check that this optimized to `size_of::() * self.len()` - // when `K` is a primitive. - fn key_size_in_bytes(&self) -> usize { - self.iter().map(|elt| elt.key_size_in_bytes()).sum() - } -} - -impl KeySize for ArrayValue { - fn key_size_in_bytes(&self) -> usize { - match self { - ArrayValue::Sum(elts) => elts.key_size_in_bytes(), - ArrayValue::Product(elts) => elts.key_size_in_bytes(), - ArrayValue::Bool(elts) => elts.key_size_in_bytes(), - ArrayValue::I8(elts) => elts.key_size_in_bytes(), - ArrayValue::U8(elts) => elts.key_size_in_bytes(), - ArrayValue::I16(elts) => elts.key_size_in_bytes(), - ArrayValue::U16(elts) => elts.key_size_in_bytes(), - ArrayValue::I32(elts) => elts.key_size_in_bytes(), - ArrayValue::U32(elts) => elts.key_size_in_bytes(), - ArrayValue::I64(elts) => elts.key_size_in_bytes(), - ArrayValue::U64(elts) => elts.key_size_in_bytes(), - ArrayValue::I128(elts) => elts.key_size_in_bytes(), - ArrayValue::U128(elts) => elts.key_size_in_bytes(), - ArrayValue::I256(elts) => elts.key_size_in_bytes(), - ArrayValue::U256(elts) => elts.key_size_in_bytes(), - ArrayValue::F32(elts) => elts.key_size_in_bytes(), - ArrayValue::F64(elts) => elts.key_size_in_bytes(), - ArrayValue::String(elts) => elts.key_size_in_bytes(), - ArrayValue::Array(elts) => elts.key_size_in_bytes(), - } - } -} - /// A B-Tree based index on a set of [`ColId`]s of a table. #[derive(Debug, PartialEq, Eq)] pub struct BTreeIndex { @@ -953,15 +836,7 @@ impl BTreeIndex { /// /// This method runs in constant time. /// - /// The key bytes of a value are defined depending on that value's type: - /// - Integer, float and boolean values take key bytes according to their [`std::mem::size_of`]. - /// - Strings take key bytes equal to their length in bytes. - /// No overhead is counted, unlike in the BFLATN or BSATN size. - /// - Sum values take 1 key byte for the tag, plus the key bytes of their active payload. - /// Inactive variants and padding are not counted, unlike in the BFLATN size. - /// - Product values take key bytes equal to the sum of their elements' key bytes. - /// Padding is not counted, unlike in the BFLATN size. - /// - Array values take key bytes equal to the sum of their elements' key bytes. + /// See the [`KeySize`] trait for more details on how this method computes its result. pub fn num_key_bytes(&self) -> u64 { self.num_key_bytes } diff --git a/crates/table/src/btree_index/key_size.rs b/crates/table/src/btree_index/key_size.rs new file mode 100644 index 00000000000..2128341f8e6 --- /dev/null +++ b/crates/table/src/btree_index/key_size.rs @@ -0,0 +1,143 @@ +use spacetimedb_sats::{ + algebraic_value::Packed, i256, u256, AlgebraicValue, ArrayValue, ProductValue, SumValue, F32, F64, +}; + +/// Index keys whose memory usage we can measure and report. +/// +/// The reported memory usage of an index is based on: +/// +/// - the number of entries in that index, i.e. the number of `RowPointer`s it stores, +/// - the total size of the keys for every entry in that index. +/// +/// This trait is used to measure the latter. +/// The metric we measure, sometimes called "data size," +/// is the number of live user-supplied bytes in the key. +/// This excludes padding and lengths, though it does include sum tags. +/// +/// The key size of a value is defined depending on that value's type: +/// - Integer, float and boolean values take bytes according to their [`std::mem::size_of`]. +/// - Strings take bytes equal to their length in bytes. +/// No overhead is counted, unlike in the BFLATN or BSATN size. +/// - Sum values take 1 byte for the tag, plus the bytes of their active payload. +/// Inactive variants and padding are not counted, unlike in the BFLATN size. +/// - Product values take bytes equal to the sum of their elements' bytes. +/// Padding is not counted, unlike in the BFLATN size. +/// - Array values take bytes equal to the sum of their elements' bytes. +/// As with strings, no overhead is counted. +pub trait KeySize { + fn key_size_in_bytes(&self) -> usize; +} + +macro_rules! impl_key_size_primitive { + ($prim:ty) => { + impl KeySize for $prim { + fn key_size_in_bytes(&self) -> usize { std::mem::size_of::() } + } + }; + ($($prim:ty,)*) => { + $(impl_key_size_primitive!($prim);)* + }; +} + +impl_key_size_primitive!( + bool, + u8, + i8, + u16, + i16, + u32, + i32, + u64, + i64, + u128, + i128, + Packed, + Packed, + u256, + i256, + F32, + F64, +); + +impl KeySize for Box { + fn key_size_in_bytes(&self) -> usize { + self.len() + } +} + +impl KeySize for AlgebraicValue { + fn key_size_in_bytes(&self) -> usize { + match self { + AlgebraicValue::Bool(x) => x.key_size_in_bytes(), + AlgebraicValue::U8(x) => x.key_size_in_bytes(), + AlgebraicValue::I8(x) => x.key_size_in_bytes(), + AlgebraicValue::U16(x) => x.key_size_in_bytes(), + AlgebraicValue::I16(x) => x.key_size_in_bytes(), + AlgebraicValue::U32(x) => x.key_size_in_bytes(), + AlgebraicValue::I32(x) => x.key_size_in_bytes(), + AlgebraicValue::U64(x) => x.key_size_in_bytes(), + AlgebraicValue::I64(x) => x.key_size_in_bytes(), + AlgebraicValue::U128(x) => x.key_size_in_bytes(), + AlgebraicValue::I128(x) => x.key_size_in_bytes(), + AlgebraicValue::U256(x) => x.key_size_in_bytes(), + AlgebraicValue::I256(x) => x.key_size_in_bytes(), + AlgebraicValue::F32(x) => x.key_size_in_bytes(), + AlgebraicValue::F64(x) => x.key_size_in_bytes(), + AlgebraicValue::String(x) => x.key_size_in_bytes(), + AlgebraicValue::Sum(x) => x.key_size_in_bytes(), + AlgebraicValue::Product(x) => x.key_size_in_bytes(), + AlgebraicValue::Array(x) => x.key_size_in_bytes(), + + AlgebraicValue::Min | AlgebraicValue::Max => unreachable!(), + } + } +} + +impl KeySize for SumValue { + fn key_size_in_bytes(&self) -> usize { + 1 + self.value.key_size_in_bytes() + } +} + +impl KeySize for ProductValue { + fn key_size_in_bytes(&self) -> usize { + self.elements.key_size_in_bytes() + } +} + +impl KeySize for [K] +where + K: KeySize, +{ + // TODO(perf, bikeshedding): check that this optimized to `size_of::() * self.len()` + // when `K` is a primitive. + fn key_size_in_bytes(&self) -> usize { + self.iter().map(|elt| elt.key_size_in_bytes()).sum() + } +} + +impl KeySize for ArrayValue { + fn key_size_in_bytes(&self) -> usize { + match self { + ArrayValue::Sum(elts) => elts.key_size_in_bytes(), + ArrayValue::Product(elts) => elts.key_size_in_bytes(), + ArrayValue::Bool(elts) => elts.key_size_in_bytes(), + ArrayValue::I8(elts) => elts.key_size_in_bytes(), + ArrayValue::U8(elts) => elts.key_size_in_bytes(), + ArrayValue::I16(elts) => elts.key_size_in_bytes(), + ArrayValue::U16(elts) => elts.key_size_in_bytes(), + ArrayValue::I32(elts) => elts.key_size_in_bytes(), + ArrayValue::U32(elts) => elts.key_size_in_bytes(), + ArrayValue::I64(elts) => elts.key_size_in_bytes(), + ArrayValue::U64(elts) => elts.key_size_in_bytes(), + ArrayValue::I128(elts) => elts.key_size_in_bytes(), + ArrayValue::U128(elts) => elts.key_size_in_bytes(), + ArrayValue::I256(elts) => elts.key_size_in_bytes(), + ArrayValue::U256(elts) => elts.key_size_in_bytes(), + ArrayValue::F32(elts) => elts.key_size_in_bytes(), + ArrayValue::F64(elts) => elts.key_size_in_bytes(), + ArrayValue::String(elts) => elts.key_size_in_bytes(), + ArrayValue::Array(elts) => elts.key_size_in_bytes(), + } + } +} From c7130ee6144a6ecdf5235181387dd1c63b7ed023 Mon Sep 17 00:00:00 2001 From: Phoebe Goldman Date: Fri, 17 Jan 2025 09:35:40 -0500 Subject: [PATCH 05/11] Blob store usages; hook up index usages --- crates/table/src/blob_store.rs | 21 +++++++++++++++++++++ crates/table/src/table.rs | 27 +++++++++++++++++++++++++-- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/crates/table/src/blob_store.rs b/crates/table/src/blob_store.rs index 3431b5b35cf..52078072ba6 100644 --- a/crates/table/src/blob_store.rs +++ b/crates/table/src/blob_store.rs @@ -104,6 +104,27 @@ pub trait BlobStore: Sync { /// /// Used when capturing a snapshot. fn iter_blobs(&self) -> BlobsIter<'_>; + + /// Returns the amount of memory in bytes used by blobs in this `BlobStore`. + /// + /// Duplicate blobs are counted a number of times equal to their refcount. + /// This is in order to preserve the property that inserting a large blob + /// causes this quantity to increase by that blob's size, + /// and deleting a large blob causes it to decrease the same amount. + fn bytes_used_by_blobs(&self) -> u64 { + self.iter_blobs() + .map(|(_, uses, data)| data.len() as u64 * uses as u64) + .sum() + } + + /// Returns the number of blobs, or more precisely, blob-usages, recorded in this `BlobStore`. + /// + /// Duplicate blobs are counted a number of times equal to their refcount. + /// This is in order to preserve the property that inserting a large blob + /// causes this quantity to increase by 1, and deleting a large blob causes it to decrease by 1. + fn num_blobs(&self) -> u64 { + self.iter_blobs().map(|(_, uses, _)| uses as u64).sum() + } } /// A blob store that panics on all operations. diff --git a/crates/table/src/table.rs b/crates/table/src/table.rs index f8e42dc8466..bfa11b29abb 100644 --- a/crates/table/src/table.rs +++ b/crates/table/src/table.rs @@ -932,11 +932,11 @@ impl Table { /// - Unallocated space within pages. /// - Per-page overhead (e.g. page headers). /// - Table overhead (e.g. the [`RowTypeLayout`], [`PointerMap`], [`Schema`] &c). - /// - Indices. - // TODO(energy): count memory usage by indices. + /// - Indexes. /// - Large blobs in the [`BlobStore`]. /// /// Of these, the caller should inspect the blob store in order to account for memory usage by large blobs, + /// and call [`Self::bytes_used_by_index_keys`] to account for indexes, /// but we intend to eat all the other overheads when billing. pub fn bytes_used_by_rows(&self) -> u64 { self.pages() @@ -944,6 +944,29 @@ impl Table { .map(|page| page.bytes_used_by_rows(self.inner.row_layout.size()) as u64) .sum() } + + /// Returns the number of rows (or [`RowPointer`]s, more accurately) + /// stored in indexes by this table. + /// + /// This method runs in constant time. + pub fn num_rows_in_indexes(&self) -> u64 { + // Assume that each index contains all rows in the table. + self.num_rows() * self.indexes.len() as u64 + } + + /// Returns the number of bytes used by keys stored in indexes by this table. + /// + /// This method scales in runtime with the number of indexes in the table, + /// but not with the number of pages or rows. + /// + /// Key size is measured using a metric called "key size" or "data size," + /// which is intended to capture the number of live user-supplied bytes, + /// not including representational overhead. + /// This is distinct from the BFLATN size measured by [`Self::bytes_used_by_rows`]. + /// See the trait [`crate::btree_index::KeySize`] for specifics on the metric measured. + pub fn bytes_used_by_index_keys(&self) -> u64 { + self.indexes.iter().map(|(_, idx)| idx.num_key_bytes()).sum() + } } /// A reference to a single row within a table. From c4e35cbf89741ceb39723b57abfb10f35e2fdcfe Mon Sep 17 00:00:00 2001 From: Phoebe Goldman Date: Fri, 17 Jan 2025 10:05:36 -0500 Subject: [PATCH 06/11] clippy --- crates/table/src/table.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/table/src/table.rs b/crates/table/src/table.rs index bfa11b29abb..4519b1259d1 100644 --- a/crates/table/src/table.rs +++ b/crates/table/src/table.rs @@ -965,7 +965,7 @@ impl Table { /// This is distinct from the BFLATN size measured by [`Self::bytes_used_by_rows`]. /// See the trait [`crate::btree_index::KeySize`] for specifics on the metric measured. pub fn bytes_used_by_index_keys(&self) -> u64 { - self.indexes.iter().map(|(_, idx)| idx.num_key_bytes()).sum() + self.indexes.values().map(|idx| idx.num_key_bytes()).sum() } } From 4eb083c825b06d786785ad907279250afecc7529 Mon Sep 17 00:00:00 2001 From: Phoebe Goldman Date: Fri, 17 Jan 2025 10:31:44 -0500 Subject: [PATCH 07/11] Add and report data size metrics for `CommittedState` --- .../locking_tx_datastore/committed_state.rs | 33 +++++++++++++++ .../locking_tx_datastore/datastore.rs | 4 ++ crates/core/src/db/db_metrics/data_size.rs | 42 +++++++++++++++++++ crates/core/src/db/db_metrics/mod.rs | 2 + 4 files changed, 81 insertions(+) create mode 100644 crates/core/src/db/db_metrics/data_size.rs diff --git a/crates/core/src/db/datastore/locking_tx_datastore/committed_state.rs b/crates/core/src/db/datastore/locking_tx_datastore/committed_state.rs index 765d158e06d..df6bf334447 100644 --- a/crates/core/src/db/datastore/locking_tx_datastore/committed_state.rs +++ b/crates/core/src/db/datastore/locking_tx_datastore/committed_state.rs @@ -644,6 +644,39 @@ impl CommittedState { let index = table.indexes.get(col_list)?; Some(&index.key_type) } + + pub(super) fn report_data_size(&self, database_identity: Identity) { + use crate::db::db_metrics::data_size::DATA_SIZE_METRICS; + + for (table_id, table) in &self.tables { + let table_name = &table.schema.table_name; + DATA_SIZE_METRICS + .data_size_table_num_rows + .with_label_values(&database_identity, &table_id.0, table_name) + .set(table.num_rows() as _); + DATA_SIZE_METRICS + .data_size_table_bytes_used_by_rows + .with_label_values(&database_identity, &table_id.0, table_name) + .set(table.bytes_used_by_rows() as _); + DATA_SIZE_METRICS + .data_size_table_num_rows_in_indexes + .with_label_values(&database_identity, &table_id.0, table_name) + .set(table.num_rows_in_indexes() as _); + DATA_SIZE_METRICS + .data_size_table_bytes_used_by_index_keys + .with_label_values(&database_identity, &table_id.0, table_name) + .set(table.bytes_used_by_index_keys() as _); + } + + DATA_SIZE_METRICS + .data_size_blob_store_num_blobs + .with_label_values(&database_identity) + .set(self.blob_store.num_blobs() as _); + DATA_SIZE_METRICS + .data_size_blob_store_bytes_used_by_blobs + .with_label_values(&database_identity) + .set(self.blob_store.bytes_used_by_blobs() as _); + } } pub struct CommittedIndexIterWithDeletedMutTx<'a> { diff --git a/crates/core/src/db/datastore/locking_tx_datastore/datastore.rs b/crates/core/src/db/datastore/locking_tx_datastore/datastore.rs index 901dee59ea7..94aabb171a4 100644 --- a/crates/core/src/db/datastore/locking_tx_datastore/datastore.rs +++ b/crates/core/src/db/datastore/locking_tx_datastore/datastore.rs @@ -681,6 +681,10 @@ pub(super) fn record_metrics( .inc_by(deletes.len() as u64); } } + + if let Some(committed_state) = committed_state { + committed_state.report_data_size(*db); + } } impl MutTx for Locking { diff --git a/crates/core/src/db/db_metrics/data_size.rs b/crates/core/src/db/db_metrics/data_size.rs new file mode 100644 index 00000000000..09430010a32 --- /dev/null +++ b/crates/core/src/db/db_metrics/data_size.rs @@ -0,0 +1,42 @@ +use once_cell::sync::Lazy; +use prometheus::IntGaugeVec; +use spacetimedb_lib::Identity; +use spacetimedb_metrics::metrics_group; + +metrics_group!( + #[non_exhaustive] + pub struct DbDataSize { + #[name = spacetime_data_size_table_num_rows] + #[help = "The number of rows in a table"] + #[labels(db: Identity, table_id: u32, table_name: str)] + pub data_size_table_num_rows: IntGaugeVec, + + #[name = spacetime_data_size_bytes_used_by_rows] + #[help = "The number of bytes used by rows in pages in a table"] + #[labels(db: Identity, table_id: u32, table_name: str)] + pub data_size_table_bytes_used_by_rows: IntGaugeVec, + + #[name = spacetime_data_size_table_num_rows_in_indexes] + #[help = "The number of rows stored in indexes in a table"] + // TODO: Consider partitioning by index ID or index name. + #[labels(db: Identity, table_id: u32, table_name: str)] + pub data_size_table_num_rows_in_indexes: IntGaugeVec, + + #[name = spacetime_data_size_table_bytes_used_by_index_keys] + #[help = "The number of bytes used by keys stored in indexes in a table"] + #[labels(db: Identity, table_id: u32, table_name: str)] + pub data_size_table_bytes_used_by_index_keys: IntGaugeVec, + + #[name = spacetime_data_size_blob_store_num_blobs] + #[help = "The number of large blobs stored in a database's blob store"] + #[labels(db: Identity)] + pub data_size_blob_store_num_blobs: IntGaugeVec, + + #[name = spacetime_data_size_blob_store_bytes_used_by_blobs] + #[help = "The number of bytes used by large blobs stored in a database's blob store"] + #[labels(db: Identity)] + pub data_size_blob_store_bytes_used_by_blobs: IntGaugeVec, + } +); + +pub static DATA_SIZE_METRICS: Lazy = Lazy::new(DbDataSize::new); diff --git a/crates/core/src/db/db_metrics/mod.rs b/crates/core/src/db/db_metrics/mod.rs index 8dd965fa200..98d1717e924 100644 --- a/crates/core/src/db/db_metrics/mod.rs +++ b/crates/core/src/db/db_metrics/mod.rs @@ -5,6 +5,8 @@ use spacetimedb_lib::Identity; use spacetimedb_metrics::metrics_group; use spacetimedb_primitives::TableId; +pub mod data_size; + metrics_group!( #[non_exhaustive] pub struct DbMetrics { From f938df21efa4cb915c563403e5767338efd0fdb3 Mon Sep 17 00:00:00 2001 From: Phoebe Goldman Date: Mon, 27 Jan 2025 11:49:00 -0500 Subject: [PATCH 08/11] First pass at testing Slow reconstructions of `num_rows` and `bytes_used_by_rows`. Still to follow: index usage reporting. --- crates/sats/src/proptest.rs | 12 ++++++++ crates/table/src/page.rs | 56 +++++++++++++++++++++++++++++++++++++ crates/table/src/table.rs | 38 ++++++++++++++++++++++++- 3 files changed, 105 insertions(+), 1 deletion(-) diff --git a/crates/sats/src/proptest.rs b/crates/sats/src/proptest.rs index 2b0c883b92b..11f09aca2bc 100644 --- a/crates/sats/src/proptest.rs +++ b/crates/sats/src/proptest.rs @@ -207,6 +207,18 @@ pub fn generate_typed_row() -> impl Strategy impl Strategy)> { + generate_row_type(0..=SIZE).prop_flat_map(move |ty| { + ( + Just(ty.clone()), + vec(generate_product_value(ty), num_rows_min..num_rows_max), + ) + }) +} + /// Generates a type `ty` and a value typed at `ty`. pub fn generate_typed_value() -> impl Strategy { generate_algebraic_type().prop_flat_map(|ty| (Just(ty.clone()), generate_algebraic_value(ty))) diff --git a/crates/table/src/page.rs b/crates/table/src/page.rs index 17dd24c97b5..b7069cd47f3 100644 --- a/crates/table/src/page.rs +++ b/crates/table/src/page.rs @@ -1129,6 +1129,16 @@ impl Page { self.header.fixed.num_rows as usize } + #[cfg(test)] + /// Use this page's present rows bitvec to compute the number of present rows. + /// + /// This can be compared with [`Self::num_rows`] as a consistency check during tests. + pub fn reconstruct_num_rows(&self) -> usize { + // If we cared, we could rewrite this to `u64::count_ones` on each block of the bitset. + // We do not care. This method is slow. + self.header.fixed.present_rows.iter_set().count() + } + /// Returns the number of var-len granules allocated in this page. /// /// This method runs in constant time. @@ -1136,6 +1146,34 @@ impl Page { self.header.var.num_granules as usize } + #[cfg(test)] + /// # Safety + /// + /// - `var_len_visitor` must be a valid [`VarLenMembers`] visitor + /// specialized to the type and layout of rows within this [`Page`]. + /// - `fixed_row_size` must be exactly the length in bytes of fixed rows in this page, + /// which must further be the length of rows expected by the `var_len_visitor`. + pub unsafe fn reconstruct_num_var_len_granules( + &self, + fixed_row_size: Size, + var_len_visitor: &impl VarLenMembers, + ) -> usize { + self.iter_fixed_len(fixed_row_size) + .flat_map(|row| unsafe { + // Safety: `row` came out of `iter_fixed_len`, + // which, due to caller requirements on `fixed_row_size`, + // is giving us valid, aligned, initialized rows of the row type. + var_len_visitor.visit_var_len(self.get_row_data(row, fixed_row_size)) + }) + .flat_map(|var_len_obj| unsafe { + // Safety: We believe `row` to be valid + // and `var_len_visitor` to be correctly visiting its var-len members. + // Therefore, `var_len_obj` is a valid var-len object. + self.iter_var_len_object(var_len_obj.first_granule) + }) + .count() + } + /// Returns the number of bytes used by rows stored in this page. /// /// This is necessarily an overestimate of live data bytes, as it includes: @@ -1155,6 +1193,24 @@ impl Page { fixed_row_bytes + var_len_bytes } + #[cfg(test)] + /// # Safety + /// + /// - `var_len_visitor` must be a valid [`VarLenMembers`] visitor + /// specialized to the type and layout of rows within this [`Page`]. + /// - `fixed_row_size` must be exactly the length in bytes of fixed rows in this page, + /// which must further be the length of rows expected by the `var_len_visitor`. + pub unsafe fn reconstruct_bytes_used_by_rows( + &self, + fixed_row_size: Size, + var_len_visitor: &impl VarLenMembers, + ) -> usize { + let fixed_row_bytes = self.reconstruct_num_rows() * fixed_row_size.len(); + let var_len_bytes = unsafe { self.reconstruct_num_var_len_granules(fixed_row_size, var_len_visitor) } + * VarLenGranule::SIZE.len(); + fixed_row_bytes + var_len_bytes + } + /// Returns the range of row data starting at `offset` and lasting `size` bytes. pub fn get_row_data(&self, row: PageOffset, size: Size) -> &Bytes { &self.row_data[row.range(size)] diff --git a/crates/table/src/table.rs b/crates/table/src/table.rs index ef109d16cb2..f5811cbaf32 100644 --- a/crates/table/src/table.rs +++ b/crates/table/src/table.rs @@ -1088,6 +1088,11 @@ impl Table { self.pages().iter().map(|page| page.num_rows() as u64).sum() } + #[cfg(test)] + fn reconstruct_num_rows(&self) -> u64 { + self.pages().iter().map(|page| page.reconstruct_num_rows() as u64).sum() + } + /// Returns the number of bytes used by rows resident in this table. /// /// This includes data bytes, padding bytes and some overhead bytes, @@ -1110,6 +1115,18 @@ impl Table { .sum() } + #[cfg(test)] + fn reconstruct_bytes_used_by_rows(&self) -> u64 { + self.pages() + .iter() + .map(|page| unsafe { + // Safety: `page` is in `self`, and was constructed using `self.innser.row_layout` and `self.inner.visitor_prog`, + // so the three are mutually consistent. + page.reconstruct_bytes_used_by_rows(self.inner.row_layout.size(), &self.inner.visitor_prog) + } as u64) + .sum() + } + /// Returns the number of rows (or [`RowPointer`]s, more accurately) /// stored in indexes by this table. /// @@ -1701,7 +1718,7 @@ pub(crate) mod test { use spacetimedb_lib::db::raw_def::v9::{RawIndexAlgorithm, RawModuleDefV9Builder}; use spacetimedb_primitives::{col_list, TableId}; use spacetimedb_sats::bsatn::to_vec; - use spacetimedb_sats::proptest::generate_typed_row; + use spacetimedb_sats::proptest::{generate_typed_row, generate_typed_row_vec}; use spacetimedb_sats::{product, AlgebraicType, ArrayValue}; use spacetimedb_schema::def::ModuleDef; use spacetimedb_schema::schema::Schema as _; @@ -1907,6 +1924,25 @@ pub(crate) mod test { prop_assert_eq!(bs_pv, bs_bsatn); prop_assert_eq!(table_pv, table_bsatn); } + + #[test] + fn row_size_reporting_matches_slow_implementations((ty, vals) in generate_typed_row_vec(128, 2048)) { + let mut blob_store = HashMapBlobStore::default(); + let mut table = table(ty.clone()); + + for row in vals { + prop_assume!(table.insert(&mut blob_store, &row).is_ok()); + } + + prop_assert_eq!(table.bytes_used_by_rows(), table.reconstruct_bytes_used_by_rows()); + prop_assert_eq!(table.num_rows(), table.reconstruct_num_rows()); + + // TODO(testing): Determine if there's a meaningful way to test that the blob store reporting is correct. + // I (pgoldman 2025-01-27) doubt it, as the test would be "visit every blob and sum their size," + // which is already what the actual implementation does. + + // TODO(testing): Put one or more indexes on `table` and verify that they report the right usage. + } } fn insert_bsatn<'a>( From 7b4a4e54de7eb9a1a91720c2236800e5264aa1c1 Mon Sep 17 00:00:00 2001 From: Phoebe Goldman Date: Mon, 27 Jan 2025 14:44:42 -0500 Subject: [PATCH 09/11] Test that single-column indexes report usage as expected --- crates/table/src/table.rs | 66 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/crates/table/src/table.rs b/crates/table/src/table.rs index f5811cbaf32..e48fb6a1c12 100644 --- a/crates/table/src/table.rs +++ b/crates/table/src/table.rs @@ -1836,6 +1836,19 @@ pub(crate) mod test { insert_retrieve_body(ty, AlgebraicValue::from(arr)).unwrap(); } + fn reconstruct_index_num_key_bytes(table: &Table, blob_store: &dyn BlobStore, index_id: IndexId) -> u64 { + let index = table.get_index_by_id(index_id).unwrap(); + + index + .seek(&(..)) + .map(|row_ptr| { + let row_ref = table.get_row_ref(blob_store, row_ptr).unwrap(); + let key = row_ref.project(&index.indexed_columns).unwrap(); + crate::btree_index::KeySize::key_size_in_bytes(&key) as u64 + }) + .sum() + } + proptest! { #![proptest_config(ProptestConfig { max_shrink_iters: 0x10000000, ..Default::default() })] @@ -1930,18 +1943,65 @@ pub(crate) mod test { let mut blob_store = HashMapBlobStore::default(); let mut table = table(ty.clone()); - for row in vals { - prop_assume!(table.insert(&mut blob_store, &row).is_ok()); + for row in &vals { + prop_assume!(table.insert(&mut blob_store, row).is_ok()); } prop_assert_eq!(table.bytes_used_by_rows(), table.reconstruct_bytes_used_by_rows()); prop_assert_eq!(table.num_rows(), table.reconstruct_num_rows()); + prop_assert_eq!(table.num_rows(), vals.len() as u64); // TODO(testing): Determine if there's a meaningful way to test that the blob store reporting is correct. // I (pgoldman 2025-01-27) doubt it, as the test would be "visit every blob and sum their size," // which is already what the actual implementation does. + } + + #[test] + fn index_size_reporting_matches_slow_implementations_single_column((ty, vals) in generate_typed_row_vec(128, 2048)) { + let mut blob_store = HashMapBlobStore::default(); + let mut table = table(ty.clone()); + + for row in &vals { + prop_assume!(table.insert(&mut blob_store, row).is_ok()); + } + + // We haven't added any indexes yet, so there should be 0 rows in indexes. + prop_assert_eq!(table.num_rows_in_indexes(), 0); + + let index_id = IndexId(0); + + // Add an index on column 0. + table.insert_index(&mut blob_store, index_id, BTreeIndex::new(&ty, ColList::from(ColId(0)), false).unwrap()); + + // We have one index, which should be fully populated, + // so in total we should have the same number of rows in indexes as we have rows. + prop_assert_eq!(table.num_rows_in_indexes(), table.num_rows()); + + let index = table.get_index_by_id(index_id).unwrap(); + + // One index, so table's reporting of bytes used should match that index's reporting. + prop_assert_eq!(table.bytes_used_by_index_keys(), index.num_key_bytes()); + + // Walk all the rows in the index, sum their key size, + // and assert it matches the `index.num_key_bytes()` + prop_assert_eq!( + index.num_key_bytes(), + reconstruct_index_num_key_bytes(&table, &blob_store, index_id) + ); + + // Walk all the rows we inserted, project them to the cols that will be their keys, + // sum their key size, + // and assert it matches the `index.num_key_bytes()` + let key_size_in_pvs = vals.iter().map(|row| { + crate::btree_index::KeySize::key_size_in_bytes(&row.elements[0]) as u64 + }).sum(); + prop_assert_eq!(index.num_key_bytes(), key_size_in_pvs); + + // Add a duplicate of the same index, so we can check that all above quantities double. + table.insert_index(&mut blob_store, IndexId(1), BTreeIndex::new(&ty, ColList::from(ColId(0)), false).unwrap()); - // TODO(testing): Put one or more indexes on `table` and verify that they report the right usage. + prop_assert_eq!(table.num_rows_in_indexes(), table.num_rows() * 2); + prop_assert_eq!(table.bytes_used_by_index_keys(), key_size_in_pvs * 2); } } From 93cbda0b97cbd118a5cd513952426fc5ce9ae4de Mon Sep 17 00:00:00 2001 From: Phoebe Goldman Date: Mon, 27 Jan 2025 16:49:24 -0500 Subject: [PATCH 10/11] Also test for two-column indexes --- crates/table/src/table.rs | 113 ++++++++++++++++++++++++-------------- 1 file changed, 73 insertions(+), 40 deletions(-) diff --git a/crates/table/src/table.rs b/crates/table/src/table.rs index e48fb6a1c12..6888f9b1039 100644 --- a/crates/table/src/table.rs +++ b/crates/table/src/table.rs @@ -1849,6 +1849,72 @@ pub(crate) mod test { .sum() } + /// Given a row type `ty`, a set of rows of that type `vals`, + /// and a set of columns within that type `indexed_columns`, + /// populate a table with `vals`, add an index on the `indexed_columns`, + /// and perform various assertions that the reported index size metrics are correct. + fn test_index_size_reporting( + ty: ProductType, + vals: Vec, + indexed_columns: ColList, + ) -> Result<(), TestCaseError> { + let mut blob_store = HashMapBlobStore::default(); + let mut table = table(ty.clone()); + + for row in &vals { + prop_assume!(table.insert(&mut blob_store, row).is_ok()); + } + + // We haven't added any indexes yet, so there should be 0 rows in indexes. + prop_assert_eq!(table.num_rows_in_indexes(), 0); + + let index_id = IndexId(0); + + // Add an index on column 0. + table.insert_index( + &blob_store, + index_id, + BTreeIndex::new(&ty, indexed_columns.clone(), false).unwrap(), + ); + + // We have one index, which should be fully populated, + // so in total we should have the same number of rows in indexes as we have rows. + prop_assert_eq!(table.num_rows_in_indexes(), table.num_rows()); + + let index = table.get_index_by_id(index_id).unwrap(); + + // One index, so table's reporting of bytes used should match that index's reporting. + prop_assert_eq!(table.bytes_used_by_index_keys(), index.num_key_bytes()); + + // Walk all the rows in the index, sum their key size, + // and assert it matches the `index.num_key_bytes()` + prop_assert_eq!( + index.num_key_bytes(), + reconstruct_index_num_key_bytes(&table, &blob_store, index_id) + ); + + // Walk all the rows we inserted, project them to the cols that will be their keys, + // sum their key size, + // and assert it matches the `index.num_key_bytes()` + let key_size_in_pvs = vals + .iter() + .map(|row| crate::btree_index::KeySize::key_size_in_bytes(&row.project(&indexed_columns).unwrap()) as u64) + .sum(); + prop_assert_eq!(index.num_key_bytes(), key_size_in_pvs); + + // Add a duplicate of the same index, so we can check that all above quantities double. + table.insert_index( + &blob_store, + IndexId(1), + BTreeIndex::new(&ty, indexed_columns, false).unwrap(), + ); + + prop_assert_eq!(table.num_rows_in_indexes(), table.num_rows() * 2); + prop_assert_eq!(table.bytes_used_by_index_keys(), key_size_in_pvs * 2); + + Ok(()) + } + proptest! { #![proptest_config(ProptestConfig { max_shrink_iters: 0x10000000, ..Default::default() })] @@ -1958,50 +2024,17 @@ pub(crate) mod test { #[test] fn index_size_reporting_matches_slow_implementations_single_column((ty, vals) in generate_typed_row_vec(128, 2048)) { - let mut blob_store = HashMapBlobStore::default(); - let mut table = table(ty.clone()); - - for row in &vals { - prop_assume!(table.insert(&mut blob_store, row).is_ok()); - } + prop_assume!(!ty.elements.is_empty()); - // We haven't added any indexes yet, so there should be 0 rows in indexes. - prop_assert_eq!(table.num_rows_in_indexes(), 0); - - let index_id = IndexId(0); - - // Add an index on column 0. - table.insert_index(&mut blob_store, index_id, BTreeIndex::new(&ty, ColList::from(ColId(0)), false).unwrap()); - - // We have one index, which should be fully populated, - // so in total we should have the same number of rows in indexes as we have rows. - prop_assert_eq!(table.num_rows_in_indexes(), table.num_rows()); - - let index = table.get_index_by_id(index_id).unwrap(); - - // One index, so table's reporting of bytes used should match that index's reporting. - prop_assert_eq!(table.bytes_used_by_index_keys(), index.num_key_bytes()); - - // Walk all the rows in the index, sum their key size, - // and assert it matches the `index.num_key_bytes()` - prop_assert_eq!( - index.num_key_bytes(), - reconstruct_index_num_key_bytes(&table, &blob_store, index_id) - ); + test_index_size_reporting(ty, vals, ColList::from(ColId(0)))?; + } - // Walk all the rows we inserted, project them to the cols that will be their keys, - // sum their key size, - // and assert it matches the `index.num_key_bytes()` - let key_size_in_pvs = vals.iter().map(|row| { - crate::btree_index::KeySize::key_size_in_bytes(&row.elements[0]) as u64 - }).sum(); - prop_assert_eq!(index.num_key_bytes(), key_size_in_pvs); + #[test] + fn index_size_reporting_matches_slow_implementations_two_column((ty, vals) in generate_typed_row_vec(128, 2048)) { + prop_assume!(ty.elements.len() >= 2); - // Add a duplicate of the same index, so we can check that all above quantities double. - table.insert_index(&mut blob_store, IndexId(1), BTreeIndex::new(&ty, ColList::from(ColId(0)), false).unwrap()); - prop_assert_eq!(table.num_rows_in_indexes(), table.num_rows() * 2); - prop_assert_eq!(table.bytes_used_by_index_keys(), key_size_in_pvs * 2); + test_index_size_reporting(ty, vals, ColList::from([ColId(0), ColId(1)]))?; } } From 81906253adeba36f8b43f24fed6534e220223b82 Mon Sep 17 00:00:00 2001 From: Phoebe Goldman Date: Tue, 28 Jan 2025 10:32:09 -0500 Subject: [PATCH 11/11] Add TODO note in response to jeff's comment --- crates/core/src/db/datastore/locking_tx_datastore/datastore.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/core/src/db/datastore/locking_tx_datastore/datastore.rs b/crates/core/src/db/datastore/locking_tx_datastore/datastore.rs index 52ff117033c..8ab90f2b07b 100644 --- a/crates/core/src/db/datastore/locking_tx_datastore/datastore.rs +++ b/crates/core/src/db/datastore/locking_tx_datastore/datastore.rs @@ -686,6 +686,8 @@ pub(super) fn record_metrics( } if let Some(committed_state) = committed_state { + // TODO(cleanliness,bikeshedding): Consider inlining `report_data_size` here, + // or moving the above metric writes into it, for consistency of organization. committed_state.report_data_size(*db); } }