Skip to content

Commit

Permalink
Fix performance issue with many duplicate ids.
Browse files Browse the repository at this point in the history
  • Loading branch information
rkistner committed Oct 15, 2024
1 parent aa09d5d commit 0f56b71
Showing 1 changed file with 3 additions and 7 deletions.
10 changes: 3 additions & 7 deletions crates/core/src/sync_local.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,21 +55,17 @@ pub fn sync_local(db: *mut sqlite::sqlite3, _data: &str) -> Result<i64, SQLiteEr

// Query for updated objects

// QUERY PLAN
// |--SCAN buckets
// |--SEARCH b USING INDEX ps_oplog_by_opid (bucket=? AND op_id>?)
// |--SEARCH r USING INDEX ps_oplog_by_row (row_type=? AND row_id=?)
// `--USE TEMP B-TREE FOR GROUP BY
// language=SQLite
let statement = db
.prepare_v2(
"\
-- 1. Filter oplog by the ops added but not applied yet (oplog b).
-- SELECT DISTINCT / UNION is important for cases with many duplicate ids.
WITH updated_rows AS (
SELECT b.row_type, b.row_id FROM ps_buckets AS buckets
SELECT DISTINCT b.row_type, b.row_id FROM ps_buckets AS buckets
CROSS JOIN ps_oplog AS b ON b.bucket = buckets.id
AND (b.op_id > buckets.last_applied_op)
UNION ALL SELECT row_type, row_id FROM ps_updated_rows
UNION SELECT row_type, row_id FROM ps_updated_rows
)
-- 3. Group the objects from different buckets together into a single one (ops).
Expand Down

0 comments on commit 0f56b71

Please sign in to comment.