Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
Centril committed Jan 28, 2025
1 parent 2e99e55 commit 3c8135b
Show file tree
Hide file tree
Showing 8 changed files with 449 additions and 56 deletions.
107 changes: 82 additions & 25 deletions crates/bench/benches/delete_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,20 +46,22 @@ fn gen_row_pointers(iters: u64) -> impl Iterator<Item = RowPointer> {
}

fn bench_custom(g: &mut BenchmarkGroup<'_, WallTime>, name: &str, run: impl Fn(u64) -> Duration) {
g.bench_function(name, |b| b.iter_custom(|i| run(i)));
g.bench_function(name, |b| b.iter_custom(&run));
}

fn bench_delete_table<DT: DeleteTable>(c: &mut Criterion) {
let name = DT::NAME;
let mut g = c.benchmark_group(name);
let row_size = black_box(FIXED_ROW_SIZE);
let new_dt = || DT::new(row_size);
bench_custom(&mut g, "mixed", |i| {
let mut dt = DT::new(FIXED_ROW_SIZE);
let mut dt = new_dt();
gen_row_pointers(i)
.map(|ptr| time(|| dt.contains(ptr)) + time(|| dt.insert(ptr)))
.sum()
});
bench_custom(&mut g, "mixed_random", |i| {
let mut dt = DT::new(FIXED_ROW_SIZE);
let mut dt = new_dt();
let mut ptrs = gen_row_pointers(i).collect_vec();
let mut rng = ThreadRng::default();
ptrs.shuffle(&mut rng);
Expand All @@ -68,11 +70,11 @@ fn bench_delete_table<DT: DeleteTable>(c: &mut Criterion) {
.sum()
});
bench_custom(&mut g, "insert", |i| {
let mut dt = DT::new(FIXED_ROW_SIZE);
let mut dt = new_dt();
gen_row_pointers(i).map(|ptr| time(|| dt.insert(ptr))).sum()
});
bench_custom(&mut g, "contains_for_half", |i| {
let mut dt = DT::new(FIXED_ROW_SIZE);
let mut dt = new_dt();
gen_row_pointers(i)
.enumerate()
.map(|(i, ptr)| {
Expand All @@ -84,7 +86,7 @@ fn bench_delete_table<DT: DeleteTable>(c: &mut Criterion) {
.sum()
});
bench_custom(&mut g, "contains_for_full", |i| {
let mut dt = DT::new(FIXED_ROW_SIZE);
let mut dt = new_dt();
gen_row_pointers(i)
.map(|ptr| {
black_box(dt.insert(ptr));
Expand All @@ -93,14 +95,14 @@ fn bench_delete_table<DT: DeleteTable>(c: &mut Criterion) {
.sum()
});
bench_custom(&mut g, "remove", |i| {
let mut dt = DT::new(FIXED_ROW_SIZE);
let mut dt = new_dt();
for ptr in gen_row_pointers(i) {
black_box(dt.insert(ptr));
}
gen_row_pointers(i).map(|ptr| time(|| dt.remove(ptr))).sum()
});
bench_custom(&mut g, "iter", |i| {
let mut dt = DT::new(FIXED_ROW_SIZE);
let mut dt = new_dt();
for ptr in gen_row_pointers(i) {
black_box(dt.insert(ptr));
}
Expand All @@ -113,9 +115,10 @@ trait DeleteTable {
const NAME: &'static str;
fn new(fixed_row_size: Size) -> Self;
fn contains(&self, ptr: RowPointer) -> bool;
fn insert(&mut self, ptr: RowPointer);
fn insert(&mut self, ptr: RowPointer) -> bool;
fn remove(&mut self, ptr: RowPointer);
fn iter(&self) -> impl Iterator<Item = RowPointer>;
fn len(&self) -> usize;
}

struct DTBTree(BTreeSet<RowPointer>);
Expand All @@ -128,15 +131,18 @@ impl DeleteTable for DTBTree {
fn contains(&self, ptr: RowPointer) -> bool {
self.0.contains(&ptr)
}
fn insert(&mut self, ptr: RowPointer) {
self.0.insert(ptr);
fn insert(&mut self, ptr: RowPointer) -> bool {
self.0.insert(ptr)
}
fn remove(&mut self, ptr: RowPointer) {
self.0.remove(&ptr);
}
fn iter(&self) -> impl Iterator<Item = RowPointer> {
self.0.iter().copied()
}
fn len(&self) -> usize {
self.0.len()
}
}

struct DTHashSet(HashSet<RowPointer>);
Expand All @@ -149,15 +155,18 @@ impl DeleteTable for DTHashSet {
fn contains(&self, ptr: RowPointer) -> bool {
self.0.contains(&ptr)
}
fn insert(&mut self, ptr: RowPointer) {
self.0.insert(ptr);
fn insert(&mut self, ptr: RowPointer) -> bool {
self.0.insert(ptr)
}
fn remove(&mut self, ptr: RowPointer) {
self.0.remove(&ptr);
}
fn iter(&self) -> impl Iterator<Item = RowPointer> {
self.0.iter().copied()
}
fn len(&self) -> usize {
self.0.len()
}
}

struct DTHashSetFH(foldhash::HashSet<RowPointer>);
Expand All @@ -170,19 +179,23 @@ impl DeleteTable for DTHashSetFH {
fn contains(&self, ptr: RowPointer) -> bool {
self.0.contains(&ptr)
}
fn insert(&mut self, ptr: RowPointer) {
self.0.insert(ptr);
fn insert(&mut self, ptr: RowPointer) -> bool {
self.0.insert(ptr)
}
fn remove(&mut self, ptr: RowPointer) {
self.0.remove(&ptr);
}
fn iter(&self) -> impl Iterator<Item = RowPointer> {
self.0.iter().copied()
}
fn len(&self) -> usize {
self.0.len()
}
}

struct DTPageAndBitSet {
deleted: Vec<Option<FixedBitSet>>,
len: usize,
fixed_row_size: Size,
}

Expand All @@ -191,6 +204,7 @@ impl DeleteTable for DTPageAndBitSet {
fn new(fixed_row_size: Size) -> Self {
Self {
deleted: <_>::default(),
len: 0,
fixed_row_size,
}
}
Expand All @@ -201,19 +215,28 @@ impl DeleteTable for DTPageAndBitSet {
_ => false,
}
}
fn insert(&mut self, ptr: RowPointer) {
fn insert(&mut self, ptr: RowPointer) -> bool {
let fixed_row_size = self.fixed_row_size;
let page_idx = ptr.page_index().idx();
let bitset_idx = ptr.page_offset() / fixed_row_size;

let new_set = || FixedBitSet::new(PageOffset::PAGE_END.idx().div_ceil(fixed_row_size.len()));

match self.deleted.get_mut(page_idx) {
Some(Some(set)) => set.set(bitset_idx, true),
Some(Some(set)) => {
let added = !set.get(bitset_idx);
set.set(bitset_idx, true);
if added {
self.len += 1;
}
added
}
Some(slot) => {
let mut set = new_set();
set.set(bitset_idx, true);
*slot = Some(set);
self.len += 1;
true
}
None => {
let pages = self.deleted.len();
Expand All @@ -225,6 +248,8 @@ impl DeleteTable for DTPageAndBitSet {
let mut set = new_set();
set.set(bitset_idx, true);
self.deleted.push(Some(set));
self.len += 1;
true
}
}
}
Expand All @@ -233,6 +258,9 @@ impl DeleteTable for DTPageAndBitSet {
let page_idx = ptr.page_index().idx();
let bitset_idx = ptr.page_offset() / fixed_row_size;
if let Some(Some(set)) = self.deleted.get_mut(page_idx) {
if set.get(bitset_idx) {
self.len -= 1;
}
set.set(bitset_idx, false);
}
}
Expand All @@ -248,6 +276,9 @@ impl DeleteTable for DTPageAndBitSet {
})
})
}
fn len(&self) -> usize {
self.len
}
}

#[derive(Clone, Copy)]
Expand All @@ -266,6 +297,7 @@ impl OffsetRange {
type OffsetRanges = SmallVec<[OffsetRange; 4]>;
struct DTPageAndOffsetRanges {
deleted: Vec<OffsetRanges>,
len: usize,
fixed_row_size: Size,
}

Expand All @@ -285,23 +317,31 @@ fn find_range_to_insert_offset(
ranges: &OffsetRanges,
offset: PageOffset,
fixed_row_size: Size,
) -> Result<(bool, usize), usize> {
) -> Result<(bool, bool, usize), usize> {
let mut extend_end = true;
let mut exists = false;
ranges
.binary_search_by(|&OffsetRange { start, end }| {
extend_end = true;
exists = false;
match end.cmp(&offset) {
// `end + row_size = offset` => we can just extend `end = offset`.
Ordering::Less if end.0 + fixed_row_size.0 == offset.0 => Ordering::Equal,
// Cannot extend this range, so let's not find it.
Ordering::Less => Ordering::Less,
// `offset` is already covered, so don't do anything,
// but `end = offset` is a no-op.
Ordering::Equal => Ordering::Equal,
Ordering::Equal => {
exists = true;
Ordering::Equal
}
// `end` is greater, but we may be covered by `start` instead.
Ordering::Greater => match start.cmp(&offset) {
// `offset` is within the range, so don't do anything.
Ordering::Less | Ordering::Equal => Ordering::Equal,
Ordering::Less | Ordering::Equal => {
exists = true;
Ordering::Equal
}
// `start - row_size = offset` => we can just extend `start = offset`.
Ordering::Greater if start.0 - fixed_row_size.0 == offset.0 => {
extend_end = false;
Expand All @@ -312,14 +352,15 @@ fn find_range_to_insert_offset(
},
}
})
.map(|idx| (extend_end, idx))
.map(|idx| (extend_end, exists, idx))
}

impl DeleteTable for DTPageAndOffsetRanges {
const NAME: &'static str = "DTPageAndOffsetRanges";
fn new(fixed_row_size: Size) -> Self {
Self {
deleted: <_>::default(),
len: 0,
fixed_row_size,
}
}
Expand All @@ -333,7 +374,7 @@ impl DeleteTable for DTPageAndOffsetRanges {
_ => false,
}
}
fn insert(&mut self, ptr: RowPointer) {
fn insert(&mut self, ptr: RowPointer) -> bool {
let fixed_row_size = self.fixed_row_size;
let page_idx = ptr.page_index().idx();
let page_offset = ptr.page_offset();
Expand All @@ -346,17 +387,20 @@ impl DeleteTable for DTPageAndOffsetRanges {
self.deleted.push(SmallVec::new());
}
self.deleted[page_idx].push(OffsetRange::point(page_offset));
return;
self.len += 1;
return true;
};

let (extend_end, range_idx) = match find_range_to_insert_offset(ranges, page_offset, fixed_row_size) {
let (extend_end, exists, range_idx) = match find_range_to_insert_offset(ranges, page_offset, fixed_row_size) {
Err(range_idx) => {
// Not found, so add a point range.
ranges.insert(range_idx, OffsetRange::point(page_offset));
return;
self.len += 1;
return true;
}
Ok(x) => x,
};

if extend_end {
let next = range_idx + 1;
let new_end = if let Some(r) = ranges
Expand All @@ -383,6 +427,12 @@ impl DeleteTable for DTPageAndOffsetRanges {
ranges[range_idx].start = page_offset;
};
}

let added = !exists;
if added {
self.len += 1;
}
added
}
fn remove(&mut self, ptr: RowPointer) {
let fixed_row_size = self.fixed_row_size;
Expand All @@ -396,6 +446,8 @@ impl DeleteTable for DTPageAndOffsetRanges {
return;
};

self.len -= 1;

let range = &mut ranges[idx];
let is_start = range.start == page_offset;
let is_end = range.end == page_offset;
Expand Down Expand Up @@ -431,14 +483,19 @@ impl DeleteTable for DTPageAndOffsetRanges {
.map(move |po| RowPointer::new(false, pi, po, SquashedOffset::COMMITTED_STATE))
})
}
fn len(&self) -> usize {
self.len
}
}

criterion_group!(
delete_table,
/*
bench_delete_table::<DTBTree>,
bench_delete_table::<DTHashSet>,
bench_delete_table::<DTHashSetFH>,
bench_delete_table::<DTPageAndBitSet>,
*/
bench_delete_table::<DTPageAndOffsetRanges>, // best so far.
);
criterion_main!(delete_table);
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
use super::{
datastore::Result,
delete_table::DeleteTable,
sequence::{Sequence, SequencesState},
state_view::{IterByColRangeTx, StateView},
tx_state::{DeleteTable, IndexIdMap, RemovedIndexIdSet, TxState},
tx_state::{IndexIdMap, RemovedIndexIdSet, TxState},
IterByColEqTx,
};
use crate::{
Expand Down Expand Up @@ -546,7 +547,7 @@ impl CommittedState {
// holds only committed rows which should be deleted,
// i.e. `RowPointer`s with `SquashedOffset::COMMITTED_STATE`,
// so no need to check before applying the deletes.
for row_ptr in row_ptrs.iter().copied() {
for row_ptr in row_ptrs.iter() {
debug_assert!(row_ptr.squashed_offset().is_committed_state());

// TODO: re-write `TxData` to remove `ProductValue`s
Expand Down Expand Up @@ -698,6 +699,6 @@ impl<'a> Iterator for CommittedIndexIterWithDeletedMutTx<'a> {

fn next(&mut self) -> Option<Self::Item> {
self.committed_rows
.find(|row_ref| !self.del_table.contains(&row_ref.pointer()))
.find(|row_ref| !self.del_table.contains(row_ref.pointer()))
}
}
Loading

0 comments on commit 3c8135b

Please sign in to comment.